Release 0.2.8 (#685 )

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
feat: support human in the loop for TS (#686 )
2026-07-02 19:14:28 -04:00 · 2025-06-12 18:09:12 +07:00 · 2025-06-12 18:00:10 +07:00 · 2025-06-09 16:53:49 +07:00 · 2025-06-06 17:19:25 +07:00 · 2025-06-06 16:43:45 +07:00
140 changed files with 12964 additions and 3679 deletions
@@ -1,5 +1,24 @@
 # create-llama

+## 0.5.22
+
+### Patch Changes
+
+- e2486eb: feat: support human in the loop for TS
+
+## 0.5.21
+
+### Patch Changes
+
+- af9ad3c: feat: show document artifact after generating report
+- a543a27: feat: bump chat-ui with inline artifact
+
+## 0.5.20
+
+### Patch Changes
+
+- 3ff0a18: fix: default header padding
+
 ## 0.5.19

 ### Patch Changes
@@ -20,6 +20,7 @@ const useCases: TemplateUseCase[] = [
  "financial_report",
  "code_generator",
  "document_generator",
+  "hitl",
 ];
 const dataSource: string = process.env.DATASOURCE
  ? process.env.DATASOURCE
@@ -27,6 +27,7 @@ const templateUseCases = [
  "financial_report",
  "deep_research",
  "code_generator",
+  // "hitl",
 ];
 const ejectDir = "next";

@@ -3,6 +3,7 @@ import { exec } from "child_process";
 import fs from "fs";
 import path from "path";
 import util from "util";
+import { NO_DATA_USE_CASES } from "../../helpers/constant";
 import {
  TemplateFramework,
  TemplateType,
@@ -25,6 +26,7 @@ const useCases: TemplateUseCase[] = [
  "financial_report",
  "code_generator",
  "document_generator",
+  "hitl",
 ];
 const dataSource: string = process.env.DATASOURCE
  ? process.env.DATASOURCE
@@ -83,7 +85,7 @@ test.describe("Test resolve TS dependencies", () => {
          });
        });
        // Skipping llamacloud for the use case doesn't use index.
-        if (useCase !== "code_generator" && useCase !== "document_generator") {
+        if (!useCase || !NO_DATA_USE_CASES.includes(useCase)) {
          test(`llamaParse - ${optionDescription}`, async () => {
            await runTest({
              templateType: templateType,
@@ -1,6 +1,15 @@
+import { TemplateUseCase } from "./types";
+
 export const COMMUNITY_OWNER = "run-llama";
 export const COMMUNITY_REPO = "create_llama_projects";
 export const LLAMA_PACK_OWNER = "run-llama";
 export const LLAMA_PACK_REPO = "llama_index";
 export const LLAMA_PACK_FOLDER = "llama-index-packs";
 export const LLAMA_PACK_FOLDER_PATH = `${LLAMA_PACK_OWNER}/${LLAMA_PACK_REPO}/main/${LLAMA_PACK_FOLDER}`;
+
+// these use cases don't have data folder, so no need to run generate and no need to getIndex
+export const NO_DATA_USE_CASES: TemplateUseCase[] = [
+  "code_generator",
+  "document_generator",
+  "hitl",
+];
@@ -4,6 +4,7 @@ import path from "path";
 import picocolors, { cyan } from "picocolors";

 import fsExtra from "fs-extra";
+import { NO_DATA_USE_CASES } from "./constant";
 import { writeLoadersConfig } from "./datasources";
 import { createBackendEnvFile, createFrontendEnvFile } from "./env-variables";
 import { PackageManager } from "./get-pkg-manager";
@@ -98,8 +99,9 @@ async function generateContextData(
        }
      } else {
        console.log(`Running ${runGenerate} to generate the context data.`);
+
        const shouldRunGenerate =
-          useCase !== "code_generator" && useCase !== "document_generator"; // Artifact use case doesn't use index.
+          !useCase || !NO_DATA_USE_CASES.includes(useCase);

        if (shouldRunGenerate) {
          await callPackageManager(packageManager, true, ["run", "generate"]);
@@ -59,7 +59,8 @@ export type TemplateUseCase =
  | "contract_review"
  | "agentic_rag"
  | "code_generator"
-  | "document_generator";
+  | "document_generator"
+  | "hitl";
 // Config for both file and folder
 export type FileSourceConfig =
  | {
@@ -4,6 +4,7 @@ import path from "path";
 import { bold, cyan, red, yellow } from "picocolors";
 import { assetRelocator, copy } from "../helpers/copy";
 import { callPackageManager } from "../helpers/install";
+import { NO_DATA_USE_CASES } from "./constant";
 import { templatesDir } from "./dir";
 import { PackageManager } from "./get-pkg-manager";
 import { InstallTemplateArgs, ModelProvider, TemplateVectorDB } from "./types";
@@ -83,7 +84,7 @@ const installLlamaIndexServerTemplate = async ({
  }

  // Simplify use case code
-  if (useCase === "code_generator" || useCase === "document_generator") {
+  if (useCase && NO_DATA_USE_CASES.includes(useCase)) {
    // Artifact use case doesn't use index.
    // We don't need data.ts, generate.ts
    await fs.rm(path.join(root, "src", "app", "data.ts"));
@@ -1,6 +1,6 @@
 {
  "name": "create-llama",
-  "version": "0.5.19",
+  "version": "0.5.22",
  "description": "Create LlamaIndex-powered apps with one command",
  "keywords": [
    "rag",
@@ -1,4 +1,5 @@
 import prompts from "prompts";
+import { NO_DATA_USE_CASES } from "../helpers/constant";
 import { EXAMPLE_10K_SEC_FILES, EXAMPLE_FILE } from "../helpers/datasources";
 import { askModelConfig } from "../helpers/providers";
 import { getTools } from "../helpers/tools";
@@ -11,7 +12,8 @@ type AppType =
  | "financial_report"
  | "deep_research"
  | "code_generator"
-  | "document_generator";
+  | "document_generator"
+  | "hitl";

 type SimpleAnswers = {
  appType: AppType;
@@ -57,6 +59,12 @@ export const askSimpleQuestions = async (
          value: "document_generator",
          description: "Build a OpenAI canvas-styled document generator.",
        },
+        {
+          title: "Human in the Loop",
+          value: "hitl",
+          description:
+            "Build a CLI command workflow that is reviewed by a human before execution",
+        },
      ],
    },
    questionHandlers,
@@ -81,7 +89,8 @@ export const askSimpleQuestions = async (
  );
  language = newLanguage;

-  if (appType !== "code_generator" && appType !== "document_generator") {
+  const shouldAskLlamaCloud = !NO_DATA_USE_CASES.includes(appType);
+  if (shouldAskLlamaCloud) {
    const { useLlamaCloud: newUseLlamaCloud } = await prompts(
      {
        type: "toggle",
@@ -170,6 +179,12 @@ const convertAnswers = async (
      tools: [],
      modelConfig: MODEL_GPT41,
    },
+    hitl: {
+      template: "llamaindexserver",
+      dataSources: [],
+      tools: [],
+      modelConfig: MODEL_GPT41,
+    },
  };

  const results = lookup[answers.appType];
@@ -4,7 +4,7 @@ import { Sparkles, Star } from "lucide-react";

 export default function Header() {
  return (
-    <div className="flex items-center justify-between px-4 pt-2">
+    <div className="flex items-center justify-between p-2 px-4">
      <div className="flex items-center gap-2">
        <Sparkles className="size-4" />
        <h1 className="font-semibold">LlamaIndex App</h1>
@@ -0,0 +1,95 @@
+import { Button } from "@/components/ui/button";
+import { Card, CardContent, CardFooter } from "@/components/ui/card";
+import { JSONValue, useChatUI } from "@llamaindex/chat-ui";
+import React, { FC, useState } from "react";
+import { z } from "zod";
+
+// This schema is equivalent to the CLICommand model defined in events.py
+const CLIInputEventSchema = z.object({
+  command: z.string(),
+});
+type CLIInputEvent = z.infer<typeof CLIInputEventSchema>;
+
+const CLIHumanInput: FC<{
+  events: JSONValue[];
+}> = ({ events }) => {
+  const inputEvent = (events || [])
+    .map((ev) => {
+      const parseResult = CLIInputEventSchema.safeParse(ev);
+      return parseResult.success ? parseResult.data : null;
+    })
+    .filter((ev): ev is CLIInputEvent => ev !== null)
+    .at(-1);
+
+  const { append } = useChatUI();
+  const [confirmedValue, setConfirmedValue] = useState<boolean | null>(null);
+  const [editableCommand, setEditableCommand] = useState<string | undefined>(
+    inputEvent?.command,
+  );
+
+  // Update editableCommand if inputEvent changes (e.g. new event comes in)
+  React.useEffect(() => {
+    setEditableCommand(inputEvent?.command);
+  }, [inputEvent?.command]);
+
+  const handleConfirm = () => {
+    append({
+      content: "Yes",
+      role: "user",
+      annotations: [
+        {
+          type: "human_response",
+          data: {
+            execute: true,
+            command: editableCommand, // Use editable command
+          },
+        },
+      ],
+    });
+    setConfirmedValue(true);
+  };
+
+  const handleCancel = () => {
+    append({
+      content: "No",
+      role: "user",
+      annotations: [
+        {
+          type: "human_response",
+          data: {
+            execute: false,
+            command: inputEvent?.command,
+          },
+        },
+      ],
+    });
+    setConfirmedValue(false);
+  };
+
+  return (
+    <Card className="my-4">
+      <CardContent className="pt-6">
+        <p className="text-sm text-gray-700">
+          Do you want to execute the following command?
+        </p>
+        <input
+          disabled
+          type="text"
+          value={editableCommand || ""}
+          onChange={(e) => setEditableCommand(e.target.value)}
+          className="my-2 w-full overflow-x-auto rounded border border-gray-300 bg-gray-100 p-3 font-mono text-xs text-gray-800"
+        />
+      </CardContent>
+      {confirmedValue === null ? (
+        <CardFooter className="flex justify-end gap-2">
+          <>
+            <Button onClick={handleConfirm}>Yes</Button>
+            <Button onClick={handleCancel}>No</Button>
+          </>
+        </CardFooter>
+      ) : null}
+    </Card>
+  );
+};
+
+export default CLIHumanInput;
@@ -23,7 +23,18 @@ from llama_index.core.workflow import (
    Workflow,
    step,
 )
-from llama_index.server.api.models import ChatRequest, SourceNodesEvent, UIEvent
+from llama_index.server.api.models import (
+    ArtifactEvent,
+    ArtifactType,
+    ChatRequest,
+    SourceNodesEvent,
+    UIEvent,
+    Artifact,
+    DocumentArtifactData,
+    DocumentArtifactSource,
+)
+import time
+from llama_index.server.utils.stream import write_response_to_stream
 from pydantic import BaseModel, Field

 logger = logging.getLogger("uvicorn")
@@ -365,8 +376,31 @@ class DeepResearchWorkflow(Workflow):
            user_request=self.user_request,
            stream=self.stream,
        )
+
+        final_response = await write_response_to_stream(res, ctx)
+
+        ctx.write_event_to_stream(
+            ArtifactEvent(
+                data=Artifact(
+                    type=ArtifactType.DOCUMENT,
+                    created_at=int(time.time()),
+                    data=DocumentArtifactData(
+                        title="DeepResearch Report",
+                        content=final_response,
+                        type="markdown",
+                        sources=[
+                            DocumentArtifactSource(
+                                id=node.id_,
+                            )
+                            for node in self.context_nodes
+                        ],
+                    ),
+                ),
+            )
+        )
+
        return StopEvent(
-            result=res,
+            result="",
        )


@@ -0,0 +1,109 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/).
+
+## Getting Started
+
+First, setup the environment with uv:
+
+> **_Note:_** This step is not needed if you are using the dev-container.
+
+```shell
+uv sync
+```
+
+Then check the parameters that have been pre-configured in the `.env` file in this directory.
+Make sure you have set the `OPENAI_API_KEY` for the LLM.
+
+Then, run the development server:
+
+```shell
+uv run fastapi dev
+```
+
+Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
+
+To start the app optimized for **production**, run:
+
+```
+uv run fastapi run
+```
+
+## Configure LLM and Embedding Model
+
+You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
+
+## Use Case
+
+This example shows how to use the LlamaIndexServer with a human in the loop. It allows you to start CLI commands that are reviewed by a human before execution.
+
+To update the workflow, you can modify the code in [`workflow.py`](app/workflow.py).
+
+You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
+
+```
+curl --location 'localhost:8000/api/chat' \
+--header 'Content-Type: application/json' \
+--data '{ "messages": [{ "role": "user", "content": "Show me the files in the current directory" }] }'
+```
+
+## How does HITL work?
+
+### Events
+
+The human-in-the-loop approach used here is based on a simple idea: the workflow pauses and waits for a human response before proceeding to the next step.
+
+To do this, you will need to implement two custom events:
+
+- [HumanInputEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is used to request input from the user.
+- [HumanResponseEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is sent to the workflow to resume execution with input from the user.
+
+In this example, we have implemented these two custom events in [`events.ts`](src/app/events.ts):
+
+- `cliHumanInputEvent` – to request input from the user for CLI command execution.
+- `cliHumanResponseEvent` – to resume the workflow with the response from the user.
+
+```typescript
+export const cliHumanInputEvent = humanInputEvent<{
+  type: "cli_human_input";
+  data: { command: string };
+  response: typeof cliHumanResponseEvent;
+}>();
+
+export const cliHumanResponseEvent = humanResponseEvent<{
+  type: "human_response";
+  data: { execute: boolean; command: string };
+}>();
+```
+
+### UI Component
+
+HITL also needs a custom UI component, that is shown when the LlamaIndexServer receives the `cliHumanInputEvent`. The name of the component is defined in the `type` field of the `cliHumanInputEvent` - in our case, it is `cli_human_input`, which corresponds to the [cli_human_input.tsx](./components/cli_human_input.tsx) component.
+
+The custom component must use `append` to send a message with a `human_response` annotation. The data of the annotation must be in the format of the response event `cliHumanResponseEvent`, in our case, for sending to execute the command `ls -l`, we would send:
+
+```tsx
+append({
+  content: "Yes",
+  role: "user",
+  annotations: [
+    {
+      type: "human_response",
+      data: {
+        execute: true,
+        command: "ls -l", // The command to execute
+      },
+    },
+  ],
+});
+```
+
+This component displays the command to execute and the user can choose to execute or cancel the command execution.
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
+- [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
+- [LlamaIndex Server](https://pypi.org/project/llama-index-server/)
+
+You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
@@ -0,0 +1,34 @@
+from typing import Type
+
+from pydantic import BaseModel, Field
+
+from llama_index.server.models import HumanInputEvent, HumanResponseEvent
+
+
+class CLIHumanResponseEvent(HumanResponseEvent):
+    execute: bool = Field(
+        description="True if the human wants to execute the command, False otherwise."
+    )
+    command: str = Field(description="The command to execute.")
+
+
+class CLICommand(BaseModel):
+    command: str = Field(description="The command to execute.")
+
+
+# We need an event that extends from HumanInputEvent for HITL feature
+class CLIHumanInputEvent(HumanInputEvent):
+    """
+    CLIInputRequiredEvent is sent when the agent needs permission from the user to execute the CLI command or not.
+    Render this event by showing the command and a boolean button to execute the command or not.
+    """
+
+    event_type: str = (
+        "cli_human_input"  # used by UI to render with appropriate component
+    )
+    response_event_type: Type = (
+        CLIHumanResponseEvent  # used by workflow to resume with the correct event
+    )
+    data: CLICommand = Field(  # the data that sent to the UI for rendering
+        description="The command to execute.",
+    )
@@ -0,0 +1,87 @@
+import platform
+import subprocess
+from typing import Any
+
+from app.events import CLICommand, CLIHumanInputEvent, CLIHumanResponseEvent
+
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.settings import Settings
+from llama_index.core.workflow import (
+    Context,
+    StartEvent,
+    StopEvent,
+    Workflow,
+    step,
+)
+
+
+def create_workflow() -> Workflow:
+    return CLIWorkflow()
+
+
+class CLIWorkflow(Workflow):
+    """
+    A workflow has ability to execute command line tool with human in the loop for confirmation.
+    """
+
+    default_prompt = PromptTemplate(
+        template="""
+        You are a helpful assistant who can write CLI commands to execute using {cli_language}.
+        Your task is to analyze the user's request and write a CLI command to execute.
+
+        ## User Request
+        {user_request}
+
+        Don't be verbose, only respond with the CLI command without any other text.
+        """
+    )
+
+    def __init__(self, **kwargs: Any) -> None:
+        # HITL Workflow should disable timeout otherwise, we will get a timeout error from callback
+        kwargs["timeout"] = None
+        super().__init__(**kwargs)
+
+    @step
+    async def start(self, ctx: Context, ev: StartEvent) -> CLIHumanInputEvent:
+        user_msg = ev.user_msg
+        if user_msg is None:
+            raise ValueError("Missing user_msg in StartEvent")
+        await ctx.set("user_msg", user_msg)
+        # Request LLM to generate a CLI command
+        os_name = platform.system()
+        if os_name == "Linux" or os_name == "Darwin":
+            cli_language = "bash"
+        else:
+            cli_language = "cmd"
+        prompt = self.default_prompt.format(
+            user_request=user_msg, cli_language=cli_language
+        )
+        llm = Settings.llm
+        if llm is None:
+            raise ValueError("Missing LLM in Settings")
+        response = await llm.acomplete(prompt, formatted=True)
+        command = response.text.strip()
+        if command == "":
+            raise ValueError("Couldn't generate a command")
+        # Send the command to the user for confirmation
+        await ctx.set("command", command)
+        return CLIHumanInputEvent(  # type: ignore
+            data=CLICommand(command=command),
+            response_event_type=CLIHumanResponseEvent,
+        )
+
+    @step
+    async def handle_human_response(
+        self,
+        ctx: Context,
+        ev: CLIHumanResponseEvent,  # This event is sent by LlamaIndexServer when user response
+    ) -> StopEvent:
+        # If we have human response, check the confirmation and execute the command
+        if ev.execute:
+            command = ev.command or ""
+            if command == "":
+                raise ValueError("Missing command in CLIExecutionEvent")
+            res = subprocess.run(command, shell=True, capture_output=True, text=True)
+            return StopEvent(result=res.stdout or res.stderr)
+        else:
+            return StopEvent(result=None)
@@ -1,4 +1,4 @@
-import { extractLastArtifact } from "@llamaindex/server";
+import { artifactEvent, extractLastArtifact } from "@llamaindex/server";
 import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";

 import {
@@ -52,19 +52,6 @@ const synthesizeAnswerEvent = workflowEvent<object>();

 const uiEvent = workflowEvent<UIEvent>();

-const artifactEvent = workflowEvent<{
-  type: "artifact";
-  data: {
-    type: "code";
-    created_at: number;
-    data: {
-      language: string;
-      file_name: string;
-      code: string;
-    };
-  };
-}>();
-
 export function workflowFactory(reqBody: any) {
  const llm = Settings.llm;

@@ -1,4 +1,4 @@
-import { toSourceEvent } from "@llamaindex/server";
+import { artifactEvent, toSourceEvent } from "@llamaindex/server";
 import {
  agentStreamEvent,
  createStatefulMiddleware,
@@ -339,6 +339,26 @@ export function getWorkflow(index: VectorStoreIndex | LlamaCloudIndex) {
        }),
      );
    }
+
+    // Open the generated report in Canvas
+    sendEvent(
+      artifactEvent.with({
+        type: "artifact",
+        data: {
+          type: "document",
+          created_at: Date.now(),
+          data: {
+            title: "DeepResearch Report",
+            content: response,
+            type: "markdown",
+            sources: state.contextNodes.map((node) => ({
+              id: node.node.id_,
+            })),
+          },
+        },
+      }),
+    );
+
    return stopAgentEvent.with({
      result: response,
    });
@@ -1,4 +1,4 @@
-import { extractLastArtifact } from "@llamaindex/server";
+import { artifactEvent, extractLastArtifact } from "@llamaindex/server";
 import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";

 import {
@@ -55,19 +55,6 @@ const synthesizeAnswerEvent = workflowEvent<{

 const uiEvent = workflowEvent<UIEvent>();

-const artifactEvent = workflowEvent<{
-  type: "artifact";
-  data: {
-    type: "document";
-    created_at: number;
-    data: {
-      title: string;
-      content: string;
-      type: "markdown" | "html";
-    };
-  };
-}>();
-
 export function workflowFactory(reqBody: any) {
  const llm = Settings.llm;

@@ -0,0 +1,106 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama).
+
+## Getting Started
+
+First, install the dependencies:
+
+```
+npm install
+```
+
+Second, run the development server:
+
+```
+npm run dev
+```
+
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the chat UI.
+
+## Configure LLM and Embedding Model
+
+You can configure [LLM model](https://ts.llamaindex.ai/docs/llamaindex/modules/llms) in the [settings file](src/app/settings.ts).
+
+## Use Case
+
+This example shows how to use the LlamaIndexServer with a human in the loop. It allows you to start CLI commands that are reviewed by a human before execution.
+
+To update the workflow, you can modify the code in [`workflow.py`](app/workflow.py).
+
+You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
+
+```
+curl --location 'localhost:8000/api/chat' \
+--header 'Content-Type: application/json' \
+--data '{ "messages": [{ "role": "user", "content": "Show me the files in the current directory" }] }'
+```
+
+## How does HITL work?
+
+### Events
+
+The human-in-the-loop approach used here is based on a simple idea: the workflow pauses and waits for a human response before proceeding to the next step.
+
+To do this, you will need to implement two custom events:
+
+- [HumanInputEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is used to request input from the user.
+- [HumanResponseEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is sent to the workflow to resume execution with input from the user.
+
+In this example, we have implemented these two custom events in [`events.ts`](src/app/events.ts):
+
+- `cliHumanInputEvent` – to request input from the user for CLI command execution.
+- `cliHumanResponseEvent` – to resume the workflow with the response from the user.
+
+```typescript
+export const cliHumanInputEvent = humanInputEvent<{
+  type: "cli_human_input";
+  data: { command: string };
+  response: typeof cliHumanResponseEvent;
+}>();
+
+export const cliHumanResponseEvent = humanResponseEvent<{
+  type: "human_response";
+  data: { execute: boolean; command: string };
+}>();
+```
+
+### UI Component
+
+HITL also needs a custom UI component, that is shown when the LlamaIndexServer receives the `cliHumanInputEvent`. The name of the component is defined in the `type` field of the `cliHumanInputEvent` - in our case, it is `cli_human_input`, which corresponds to the [cli_human_input.tsx](./components/cli_human_input.tsx) component.
+
+The custom component must use `append` to send a message with a `human_response` annotation. The data of the annotation must be in the format of the response event `cliHumanResponseEvent`, in our case, for sending to execute the command `ls -l`, we would send:
+
+```tsx
+append({
+  content: "Yes",
+  role: "user",
+  annotations: [
+    {
+      type: "human_response",
+      data: {
+        execute: true,
+        command: "ls -l", // The command to execute
+      },
+    },
+  ],
+});
+```
+
+This component displays the command to execute and the user can choose to execute or cancel the command execution.
+
+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
+- [LlamaIndexTS Documentation](https://ts.llamaindex.ai/docs/llamaindex) - learn about LlamaIndex (Typescript features).
+- [Workflows Introduction](https://ts.llamaindex.ai/docs/llamaindex/modules/workflows) - learn about LlamaIndexTS workflows.
+
+You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!
@@ -0,0 +1,12 @@
+import { humanInputEvent, humanResponseEvent } from "@llamaindex/server";
+
+export const cliHumanInputEvent = humanInputEvent<{
+  type: "cli_human_input";
+  data: { command: string };
+  response: typeof cliHumanResponseEvent;
+}>();
+
+export const cliHumanResponseEvent = humanResponseEvent<{
+  type: "human_response";
+  data: { execute: boolean; command: string };
+}>();
@@ -0,0 +1,20 @@
+import { execSync } from "child_process";
+import { tool } from "llamaindex";
+import { z } from "zod";
+
+export const cliExecutor = tool({
+  name: "cli_executor",
+  description: "This tool executes a command and returns the output.",
+  parameters: z.object({ command: z.string() }),
+  execute: async ({ command }) => {
+    try {
+      const output = execSync(command, {
+        encoding: "utf-8",
+      });
+      return output;
+    } catch (error) {
+      console.error(error);
+      return "Command failed";
+    }
+  },
+});
@@ -0,0 +1,101 @@
+import { toAgentRunEvent, writeResponseToStream } from "@llamaindex/server";
+import { chatWithTools } from "@llamaindex/tools";
+import {
+  createWorkflow,
+  getContext,
+  startAgentEvent,
+  stopAgentEvent,
+  withSnapshot,
+  workflowEvent,
+} from "@llamaindex/workflow";
+import { ChatMessage, Settings, ToolCallLLM } from "llamaindex";
+import { cliHumanInputEvent, cliHumanResponseEvent } from "./events";
+import { cliExecutor } from "./tools";
+
+const summaryEvent = workflowEvent<string>(); // simple event to summarize the result
+
+export const workflowFactory = (body: unknown) => {
+  const llm = Settings.llm as ToolCallLLM;
+
+  if (!llm.supportToolCall) {
+    throw new Error("LLM is not a ToolCallLLM");
+  }
+
+  const { messages } = body as { messages: ChatMessage[] };
+
+  const workflow = withSnapshot(createWorkflow());
+
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { userInput, chatHistory = [] } = data;
+    if (!userInput) {
+      throw new Error("User input is required");
+    }
+
+    // in this example, we use chatWithTools to decide should perform a tool call or not
+    // if cli executor is called, emit HumanInputEvent to ask user for permission
+    const toolCallResponse = await chatWithTools(
+      llm,
+      [cliExecutor],
+      chatHistory.concat({ role: "user", content: userInput }),
+    );
+    const cliExecutorToolCall = toolCallResponse.toolCalls.find(
+      (toolCall) => toolCall.name === cliExecutor.metadata.name,
+    );
+    const command = cliExecutorToolCall?.input?.command as string;
+    if (command) {
+      return cliHumanInputEvent.with({
+        type: "cli_human_input",
+        data: { command },
+        response: cliHumanResponseEvent,
+      });
+    }
+
+    // if no tool call, just response as normal
+    return summaryEvent.with("");
+  });
+
+  // do actions after getting response from human
+  workflow.handle([cliHumanResponseEvent], async ({ data }) => {
+    const { sendEvent } = getContext();
+    const { command, execute } = data.data;
+
+    if (!execute) {
+      // stop the workflow if user reject to execute the command
+      return summaryEvent.with(`User reject to execute the command ${command}`);
+    }
+
+    sendEvent(
+      toAgentRunEvent({
+        agent: "CLI Executor",
+        text: `Execute the command "${command}" and return the result`,
+        type: "text",
+      }),
+    );
+
+    const result = (await cliExecutor.call({ command })) as string;
+
+    return summaryEvent.with(
+      `Executed the command ${command} and got the result: ${result}`,
+    );
+  });
+
+  workflow.handle([summaryEvent], async ({ data: summaryResult }) => {
+    const { sendEvent } = getContext();
+
+    const chatHistory = messages;
+    if (summaryResult) {
+      chatHistory.push({ role: "user", content: summaryResult });
+    }
+
+    const stream = await llm.chat({
+      messages: chatHistory,
+      stream: true,
+    });
+
+    const result = await writeResponseToStream(stream, sendEvent);
+
+    return stopAgentEvent.with({ result });
+  });
+
+  return workflow;
+};
@@ -12,7 +12,7 @@
  "dependencies": {
    "@llamaindex/openai": "~0.4.0",
    "@llamaindex/server": "~0.2.1",
-    "@llamaindex/workflow": "~1.1.3",
+    "@llamaindex/workflow": "~1.1.8",
    "@llamaindex/tools": "~0.0.11",
    "llamaindex": "~0.11.0",
    "dotenv": "^16.4.7",
@@ -20,7 +20,7 @@
  },
  "devDependencies": {
    "@types/node": "^20.10.3",
-    "tsx": "^4.7.2",
+    "tsx": "4.7.2",
    "typescript": "^5.3.2",
    "nodemon": "^3.1.10"
  }
@@ -1,5 +1,33 @@
 # @llamaindex/server

+## 0.2.8
+
+### Patch Changes
+
+- e2486eb: feat: support human in the loop for TS
+
+## 0.2.7
+
+### Patch Changes
+
+- af9ad3c: feat: show document artifact after generating report
+- a543a27: feat: bump chat-ui with inline artifact
+- 1ff6eaf: Add support for chat upload file
+
+## 0.2.6
+
+### Patch Changes
+
+- 3ff0a18: fix: default header padding
+- df10474: fix: missing cursor pointer for button
+- 087c961: Support zod and chat-ui hooks for custom components
+
+## 0.2.5
+
+### Patch Changes
+
+- 058b376: Fix generate script for ejected project
+
 ## 0.2.4

 ### Patch Changes
@@ -8,6 +8,7 @@ LlamaIndexServer is a Next.js-based application that allows you to quickly launc
 - Edit code and document artifacts in an OpenAI Canvas-style UI
 - Extendable UI components for events and headers
 - Built on Next.js for high performance and easy API development
+- Human-in-the-loop (HITL) support, check out the [Human-in-the-loop](https://github.com/run-llama/create-llama/blob/main/packages/server/examples/hitl/README.md) documentation for more details.

 ## Installation

@@ -60,6 +61,7 @@ The `LlamaIndexServer` accepts the following configuration options:
 - `workflow`: A callable function that creates a workflow instance for each request. See [Workflow factory contract](#workflow-factory-contract) for more details.
 - `uiConfig`: An object to configure the chat UI containing the following properties:
  - `starterQuestions`: List of starter questions for the chat UI (default: `[]`)
+  - `enableFileUpload`: Whether to enable file upload in the chat UI (default: `false`). See [Upload file example](./examples/private-file/README.md) for more details.
  - `componentsDir`: The directory for custom UI components rendering events emitted by the workflow. The default is undefined, which does not render custom UI components.
  - `layoutDir`: The directory for custom layout sections. The default value is `layout`. See [Custom Layout](#custom-layout) for more details.
  - `llamaCloudIndexSelector`: Whether to show the LlamaCloud index selector in the chat UI (requires `LLAMA_CLOUD_API_KEY` to be set in the environment variables) (default: `false`)
@@ -71,9 +73,18 @@ See all Nextjs Custom Server options [here](https://nextjs.org/docs/app/building

 ## Workflow factory contract

-The `workflow` provided will be called for each chat request to initialize a new workflow instance. The contract of the generated workflow must be the same as for the [Agent Workflow](https://ts.llamaindex.ai/docs/llamaindex/modules/agents/agent_workflow).
+The `workflow` provided will be called for each chat request to initialize a new workflow instance. For advanced use cases, you can define workflowFactory with a chatBody which include list of UI messages in the request body.

-This means that the workflow must handle a `startAgentEvent` event, which is the entry point of the workflow and contains the following information in it's `data` property:
+```typescript
+import { type Message } from "ai";
+import { agent } from "@llamaindex/workflow";
+
+const workflowFactory = (chatBody: { messages: Message[] }) => {
+  ...
+};
+```
+
+The contract of the generated workflow must be the same as for the [Agent Workflow](https://ts.llamaindex.ai/docs/llamaindex/modules/agents/agent_workflow). This means that the workflow must handle a `startAgentEvent` event, which is the entry point of the workflow and contains the following information in it's `data` property:

 ```typescript
 {
@@ -1,12 +1,38 @@
 # LlamaIndex Server Examples

-This directory contains examples of how to use the LlamaIndex Server.
+This directory provides example projects demonstrating how to use the LlamaIndex Server.

-## Running the examples
+## How to Run the Examples

-```bash
-export OPENAI_API_KEY=your_openai_api_key
-pnpm run dev
-```
+1. **Install dependencies**

-## Open browser at http://localhost:3000
+   In the root of this directory, run:
+
+   ```bash
+   pnpm install
+   ```
+
+2. **Set your OpenAI API key**
+
+   Export your OpenAI API key as an environment variable:
+
+   ```bash
+   export OPENAI_API_KEY=your_openai_api_key
+   ```
+
+3. **Start an example**
+
+   Replace `<example>` with the name of the example you want to run (e.g., `private-file`):
+
+   ```bash
+   pnpm nodemon --exec tsx <example>/index.ts
+   ```
+
+4. **Open the application in your browser**
+
+   Visit [http://localhost:3000](http://localhost:3000) to interact with the running example.
+
+## Notes
+
+- Make sure you have [pnpm](https://pnpm.io/) installed.
+- Each example may have its own specific instructions or requirements; check the individual example's index.ts for details.
@@ -1,12 +1,7 @@
+import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
 import { LlamaIndexServer } from "@llamaindex/server";
 import { agent } from "@llamaindex/workflow";
-import {
-  Document,
-  OpenAI,
-  OpenAIEmbedding,
-  Settings,
-  VectorStoreIndex,
-} from "llamaindex";
+import { Document, Settings, VectorStoreIndex } from "llamaindex";

 Settings.llm = new OpenAI({
  model: "gpt-4o-mini",
@@ -0,0 +1,22 @@
+This example demonstrates how to use the code generation workflow.
+
+```ts
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  uiConfig: {
+    starterQuestions: [
+      "Generate a calculator app",
+      "Create a simple todo list app",
+    ],
+    componentsDir: "components",
+  },
+  port: 3000,
+}).start();
+```
+
+Export OpenAI API key and start the server in dev mode.
+
+```bash
+export OPENAI_API_KEY=<your-openai-api-key>
+npx nodemon --exec tsx index.ts
+```
@@ -0,0 +1,132 @@
+import { Badge } from "@/components/ui/badge";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Progress } from "@/components/ui/progress";
+import { Skeleton } from "@/components/ui/skeleton";
+import { cn } from "@/lib/utils";
+import { Markdown } from "@llamaindex/chat-ui/widgets";
+import { ListChecks, Loader2, Wand2 } from "lucide-react";
+import { useEffect, useState } from "react";
+
+const STAGE_META = {
+  plan: {
+    icon: ListChecks,
+    badgeText: "Step 1/2: Planning",
+    gradient: "from-blue-100 via-blue-50 to-white",
+    progress: 33,
+    iconBg: "bg-blue-100 text-blue-600",
+    badge: "bg-blue-100 text-blue-700",
+  },
+  generate: {
+    icon: Wand2,
+    badgeText: "Step 2/2: Generating",
+    gradient: "from-violet-100 via-violet-50 to-white",
+    progress: 66,
+    iconBg: "bg-violet-100 text-violet-600",
+    badge: "bg-violet-100 text-violet-700",
+  },
+};
+
+function ArtifactWorkflowCard({ event }) {
+  const [visible, setVisible] = useState(event?.state !== "completed");
+  const [fade, setFade] = useState(false);
+
+  useEffect(() => {
+    if (event?.state === "completed") {
+      setVisible(false);
+    } else {
+      setVisible(true);
+      setFade(false);
+    }
+  }, [event?.state]);
+
+  if (!event || !visible) return null;
+
+  const { state, requirement } = event;
+  const meta = STAGE_META[state];
+
+  if (!meta) return null;
+
+  return (
+    <div className="flex min-h-[180px] w-full items-center justify-center py-2">
+      <Card
+        className={cn(
+          "w-full rounded-xl shadow-md transition-all duration-500",
+          "border-0",
+          fade && "pointer-events-none opacity-0",
+          `bg-gradient-to-br ${meta.gradient}`,
+        )}
+        style={{
+          boxShadow:
+            "0 2px 12px 0 rgba(80, 80, 120, 0.08), 0 1px 3px 0 rgba(80, 80, 120, 0.04)",
+        }}
+      >
+        <CardHeader className="flex flex-row items-center gap-2 px-3 pb-1 pt-2">
+          <div
+            className={cn(
+              "flex items-center justify-center rounded-full p-1",
+              meta.iconBg,
+            )}
+          >
+            <meta.icon className="h-5 w-5" />
+          </div>
+          <CardTitle className="flex items-center gap-2 text-base font-semibold">
+            <Badge className={cn("ml-1", meta.badge, "px-2 py-0.5 text-xs")}>
+              {meta.badgeText}
+            </Badge>
+          </CardTitle>
+        </CardHeader>
+        <CardContent className="px-3 py-1">
+          {state === "plan" && (
+            <div className="flex flex-col items-center gap-2 py-2">
+              <Loader2 className="mb-1 h-6 w-6 animate-spin text-blue-400" />
+              <div className="text-center text-sm font-medium text-blue-900">
+                Analyzing your request...
+              </div>
+              <Skeleton className="mt-1 h-3 w-1/2 rounded-full" />
+            </div>
+          )}
+          {state === "generate" && (
+            <div className="flex flex-col gap-2 py-2">
+              <div className="flex items-center gap-1">
+                <Loader2 className="h-4 w-4 animate-spin text-violet-400" />
+                <span className="text-sm font-medium text-violet-900">
+                  Working on the requirement:
+                </span>
+              </div>
+              <div className="max-h-24 overflow-auto rounded-lg border border-violet-200 bg-violet-50 px-2 py-1 text-xs">
+                {requirement ? (
+                  <Markdown content={requirement} />
+                ) : (
+                  <span className="italic text-violet-400">
+                    No requirements available yet.
+                  </span>
+                )}
+              </div>
+            </div>
+          )}
+        </CardContent>
+        <div className="px-3 pb-2 pt-1">
+          <Progress
+            value={meta.progress}
+            className={cn(
+              "h-1 rounded-full bg-gray-200",
+              state === "plan" && "bg-blue-200",
+              state === "generate" && "bg-violet-200",
+            )}
+          />
+        </div>
+      </Card>
+    </div>
+  );
+}
+
+export default function Component({ events }) {
+  const aggregateEvents = () => {
+    if (!events || events.length === 0) return null;
+    return events[events.length - 1];
+  };
+
+  const event = aggregateEvents();
+
+  return <ArtifactWorkflowCard event={event} />;
+}
@@ -0,0 +1,20 @@
+import { OpenAI } from "@llamaindex/openai";
+import { LlamaIndexServer } from "@llamaindex/server";
+import { Settings } from "llamaindex";
+import { workflowFactory } from "./src/app/workflow";
+
+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  uiConfig: {
+    starterQuestions: [
+      "Generate a calculator app",
+      "Create a simple todo list app",
+    ],
+    componentsDir: "components",
+  },
+  port: 3000,
+}).start();
@@ -0,0 +1,337 @@
+import { artifactEvent, extractLastArtifact } from "@llamaindex/server";
+import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";
+
+import {
+  agentStreamEvent,
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+  stopAgentEvent,
+  workflowEvent,
+} from "@llamaindex/workflow";
+
+import { z } from "zod";
+
+export const RequirementSchema = z.object({
+  next_step: z.enum(["answering", "coding"]),
+  language: z.string().nullable().optional(),
+  file_name: z.string().nullable().optional(),
+  requirement: z.string(),
+});
+
+export type Requirement = z.infer<typeof RequirementSchema>;
+
+export const UIEventSchema = z.object({
+  type: z.literal("ui_event"),
+  data: z.object({
+    state: z
+      .enum(["plan", "generate", "completed"])
+      .describe(
+        "The current state of the workflow: 'plan', 'generate', or 'completed'.",
+      ),
+    requirement: z
+      .string()
+      .optional()
+      .describe(
+        "An optional requirement creating or updating a code, if applicable.",
+      ),
+  }),
+});
+
+export type UIEvent = z.infer<typeof UIEventSchema>;
+const planEvent = workflowEvent<{
+  userInput: MessageContent;
+  context?: string | undefined;
+}>();
+
+const generateArtifactEvent = workflowEvent<{
+  requirement: Requirement;
+}>();
+
+const synthesizeAnswerEvent = workflowEvent<object>();
+
+const uiEvent = workflowEvent<UIEvent>();
+
+export function workflowFactory(reqBody: unknown) {
+  const llm = Settings.llm;
+
+  const { withState, getContext } = createStatefulMiddleware(() => {
+    return {
+      memory: new ChatMemoryBuffer({ llm }),
+      lastArtifact: extractLastArtifact(reqBody),
+    };
+  });
+  const workflow = withState(createWorkflow());
+
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { userInput, chatHistory = [] } = data;
+    // Prepare chat history
+    const { state } = getContext();
+    // Put user input to the memory
+    if (!userInput) {
+      throw new Error("Missing user input to start the workflow");
+    }
+    state.memory.set(chatHistory);
+    state.memory.put({ role: "user", content: userInput });
+
+    return planEvent.with({
+      userInput: userInput,
+      context: state.lastArtifact
+        ? JSON.stringify(state.lastArtifact)
+        : undefined,
+    });
+  });
+
+  workflow.handle([planEvent], async ({ data: planData }) => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "plan",
+        },
+      }),
+    );
+    const user_msg = planData.userInput;
+    const context = planData.context
+      ? `## The context is: \n${planData.context}\n`
+      : "";
+    const prompt = `
+You are a product analyst responsible for analyzing the user's request and providing the next step for code or document generation.
+You are helping user with their code artifact. To update the code, you need to plan a coding step.
+
+Follow these instructions:
+1. Carefully analyze the conversation history and the user's request to determine what has been done and what the next step should be.
+2. The next step must be one of the following two options:
+    - "coding": To make the changes to the current code.
+    - "answering": If you don't need to update the current code or need clarification from the user.
+Important: Avoid telling the user to update the code themselves, you are the one who will update the code (by planning a coding step).
+3. If the next step is "coding", you may specify the language ("typescript" or "python") and file_name if known, otherwise set them to null. 
+4. The requirement must be provided clearly what is the user request and what need to be done for the next step in details
+    as precise and specific as possible, don't be stingy with in the requirement.
+5. If the next step is "answering", set language and file_name to null, and the requirement should describe what to answer or explain to the user.
+6. Be concise; only return the requirements for the next step.
+7. The requirements must be in the following format:
+    \`\`\`json
+    {
+        "next_step": "answering" | "coding",
+        "language": "typescript" | "python" | null,
+        "file_name": string | null,
+        "requirement": string
+    }
+    \`\`\`
+
+## Example 1:
+User request: Create a calculator app.
+You should return:
+\`\`\`json
+{
+    "next_step": "coding",
+    "language": "typescript",
+    "file_name": "calculator.tsx",
+    "requirement": "Generate code for a calculator app that has a simple UI with a display and button layout. The display should show the current input and the result. The buttons should include basic operators, numbers, clear, and equals. The calculation should work correctly."
+}
+\`\`\`
+
+## Example 2:
+User request: Explain how the game loop works.
+Context: You have already generated the code for a snake game.
+You should return:
+\`\`\`json
+{
+    "next_step": "answering",
+    "language": null,
+    "file_name": null,
+    "requirement": "The user is asking about the game loop. Explain how the game loop works."
+}
+\`\`\`
+
+${context}
+
+Now, plan the user's next step for this request:
+${user_msg}
+`;
+
+    const response = await llm.complete({
+      prompt,
+    });
+    // parse the response to Requirement
+    // 1. use regex to find the json block
+    const jsonBlock = response.text.match(/```json\s*([\s\S]*?)\s*```/);
+    if (!jsonBlock) {
+      throw new Error("No JSON block found in the response.");
+    }
+    const requirement = RequirementSchema.parse(JSON.parse(jsonBlock[1]));
+    state.memory.put({
+      role: "assistant",
+      content: `The plan for next step: \n${response.text}`,
+    });
+
+    if (requirement.next_step === "coding") {
+      return generateArtifactEvent.with({
+        requirement,
+      });
+    } else {
+      return synthesizeAnswerEvent.with({});
+    }
+  });
+
+  workflow.handle([generateArtifactEvent], async ({ data: planData }) => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "generate",
+          requirement: planData.requirement.requirement,
+        },
+      }),
+    );
+
+    const previousArtifact = state.lastArtifact
+      ? JSON.stringify(state.lastArtifact)
+      : "There is no previous artifact";
+    const requirementText = planData.requirement.requirement;
+
+    const prompt = `
+        You are a skilled developer who can help user with coding.
+        You are given a task to generate or update a code for a given requirement.
+
+        ## Follow these instructions:
+        **1. Carefully read the user's requirements.** 
+           If any details are ambiguous or missing, make reasonable assumptions and clearly reflect those in your output.
+           If the previous code is provided:
+           + Carefully analyze the code with the request to make the right changes.
+           + Avoid making a lot of changes from the previous code if the request is not to write the code from scratch again.
+        **2. For code requests:**
+           - If the user does not specify a framework or language, default to a React component using the Next.js framework.
+           - For Next.js, use Shadcn UI components, Typescript, @types/node, @types/react, @types/react-dom, PostCSS, and TailwindCSS.
+           The import pattern should be:
+           \`\`\`typescript
+           import { ComponentName } from "@/components/ui/component-name"
+           import { Markdown } from "@llamaindex/chat-ui"
+           import { cn } from "@/lib/utils"
+           \`\`\`
+           - Ensure the code is idiomatic, production-ready, and includes necessary imports.
+           - Only generate code relevant to the user's request—do not add extra boilerplate.
+        **3. Don't be verbose on response**
+           - No other text or comments only return the code which wrapped by \`\`\`language\`\`\` block.
+           - If the user's request is to update the code, only return the updated code.
+        **4. Only the following languages are allowed: "typescript", "python".**
+        **5. If there is no code to update, return the reason without any code block.**
+           
+        ## Example:
+        \`\`\`typescript
+        import React from "react";
+        import { Button } from "@/components/ui/button";
+        import { cn } from "@/lib/utils";
+
+        export default function MyComponent() {
+        return (
+           <div className="flex flex-col items-center justify-center h-screen">
+              <Button>Click me</Button>
+           </div>
+        );
+        }
+        \`\`\`
+
+        The previous code is:
+        {previousArtifact}
+
+        Now, i have to generate the code for the following requirement:
+        {requirement}
+      `
+      .replace("{previousArtifact}", previousArtifact)
+      .replace("{requirement}", requirementText);
+
+    const response = await llm.complete({
+      prompt,
+    });
+
+    // Extract the code from the response
+    const codeMatch = response.text.match(/```(\w+)([\s\S]*)```/);
+    if (!codeMatch) {
+      return synthesizeAnswerEvent.with({});
+    }
+
+    const code = codeMatch[2].trim();
+
+    // Put the generated code to the memory
+    state.memory.put({
+      role: "assistant",
+      content: `Updated the code: \n${response.text}`,
+    });
+
+    // To show the Canvas panel for the artifact
+    sendEvent(
+      artifactEvent.with({
+        type: "artifact",
+        data: {
+          type: "code",
+          created_at: Date.now(),
+          data: {
+            language: planData.requirement.language || "",
+            file_name: planData.requirement.file_name || "",
+            code,
+          },
+        },
+      }),
+    );
+
+    return synthesizeAnswerEvent.with({});
+  });
+
+  workflow.handle([synthesizeAnswerEvent], async () => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+
+    const chatHistory = await state.memory.getMessages();
+    const messages = [
+      ...chatHistory,
+      {
+        role: "system" as const,
+        content: `
+        You are a helpful assistant who is responsible for explaining the work to the user.
+        Based on the conversation history, provide an answer to the user's question. 
+        The user has access to the code so avoid mentioning the whole code again in your response.
+      `,
+      },
+    ];
+
+    const responseStream = await llm.chat({
+      messages,
+      stream: true,
+    });
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "completed",
+        },
+      }),
+    );
+
+    let response = "";
+    for await (const chunk of responseStream) {
+      response += chunk.delta;
+      sendEvent(
+        agentStreamEvent.with({
+          delta: chunk.delta,
+          response: "",
+          currentAgentName: "assistant",
+          raw: chunk,
+        }),
+      );
+    }
+
+    return stopAgentEvent.with({
+      result: response,
+    });
+  });
+
+  return workflow;
+}
@@ -1,8 +1,13 @@
+import { OpenAI } from "@llamaindex/openai";
 import { LlamaIndexServer } from "@llamaindex/server";
 import { agent } from "@llamaindex/workflow";
-import { tool } from "llamaindex";
+import { Settings, tool } from "llamaindex";
 import { z } from "zod";

+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
 const weatherAgent = agent({
  tools: [
    tool({
@@ -4,7 +4,7 @@ import { Sparkles, Star } from "lucide-react";

 export default function Header() {
  return (
-    <div className="flex items-center justify-between px-4 pt-2">
+    <div className="flex items-center justify-between p-2 px-4">
      <div className="flex items-center gap-2">
        <Sparkles className="size-4" />
        <h1 className="font-semibold">LlamaIndex App</h1>
@@ -1,6 +1,12 @@
+import { OpenAI } from "@llamaindex/openai";
 import { LlamaIndexServer } from "@llamaindex/server";
+import { Settings } from "llamaindex";
 import { workflowFactory } from "./src/app/workflow";

+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
 new LlamaIndexServer({
  workflow: workflowFactory,
  uiConfig: {
@@ -0,0 +1,172 @@
+# Human in the Loop
+
+This example shows how to use the LlamaIndexServer with a human in the loop. It allows you to start CLI commands that are reviewed by a human before execution.
+
+## Getting Started
+
+### Environment Setup
+
+Export your OpenAI API key:
+
+```bash
+export OPENAI_API_KEY=<your-openai-api-key>
+```
+
+### Starting the Server
+
+Run the server in development mode:
+
+```bash
+npx nodemon --exec tsx index.ts --ignore output/*
+```
+
+### Access the Application
+
+Open your browser and go to:
+
+```
+http://localhost:3000
+```
+
+You will see the LlamaIndexServer UI, where you can interact with the HITL agent. Try "List all files in the current directory" and see how the agent pauses and waits for a human response before executing the command.
+
+## How does HITL work?
+
+### Events
+
+The human-in-the-loop approach used here is based on a simple idea: the workflow pauses and waits for a human response before proceeding to the next step.
+
+To do this, you will need to implement two custom events:
+
+- [HumanInputEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is used to request input from the user.
+- [HumanResponseEvent](https://github.com/run-llama/create-llama/blob/main/packages/server/src/utils/hitl/events.ts): This event is sent to the workflow to resume execution with input from the user.
+
+In this example, we have implemented these two custom events in [`events.ts`](src/app/events.ts):
+
+- `cliHumanInputEvent` – to request input from the user for CLI command execution.
+- `cliHumanResponseEvent` – to resume the workflow with the response from the user.
+
+```typescript
+export const cliHumanInputEvent = humanInputEvent<{
+  type: "cli_human_input";
+  data: { command: string };
+  response: typeof cliHumanResponseEvent;
+}>();
+
+export const cliHumanResponseEvent = humanResponseEvent<{
+  type: "human_response";
+  data: { execute: boolean; command: string };
+}>();
+```
+
+### UI Component
+
+HITL also needs a custom UI component, that is shown when the LlamaIndexServer receives the `cliHumanInputEvent`. The name of the component is defined in the `type` field of the `cliHumanInputEvent` - in our case, it is `cli_human_input`, which corresponds to the [cli_human_input.tsx](./components/cli_human_input.tsx) component.
+
+The custom component must use `append` to send a message with a `human_response` annotation. The data of the annotation must be in the format of the response event `cliHumanResponseEvent`, in our case, for sending to execute the command `ls -l`, we would send:
+
+```tsx
+append({
+  content: "Yes",
+  role: "user",
+  annotations: [
+    {
+      type: "human_response",
+      data: {
+        execute: true,
+        command: "ls -l", // The command to execute
+      },
+    },
+  ],
+});
+```
+
+This component displays the command to execute and the user can choose to execute or cancel the command execution.
+
+### Workflow Implementation
+
+The workflow is implemented in [`workflow.ts`](src/app/workflow.ts) using LlamaIndex workflows. The workflow handles three main steps:
+
+1. **Initial Request Handling**: When a user input is received, the workflow uses `chatWithTools` to determine if a CLI command should be executed. If so, it emits a `cliHumanInputEvent` to request user permission.
+
+```typescript
+workflow.handle([startAgentEvent], async ({ data }) => {
+  const { userInput, chatHistory = [] } = data;
+
+  const toolCallResponse = await chatWithTools(
+    llm,
+    [cliExecutor],
+    chatHistory.concat({ role: "user", content: userInput }),
+  );
+
+  const cliExecutorToolCall = toolCallResponse.toolCalls.find(
+    (toolCall) => toolCall.name === cliExecutor.metadata.name,
+  );
+
+  const command = cliExecutorToolCall?.input?.command as string;
+  if (command) {
+    return cliHumanInputEvent.with({
+      type: "cli_human_input",
+      data: { command },
+      response: cliHumanResponseEvent,
+    });
+  }
+
+  return summaryEvent.with("");
+});
+```
+
+2. **Human Response Handling**: After receiving human input, the workflow either executes the command or cancels based on the user's choice.
+
+```typescript
+workflow.handle([cliHumanResponseEvent], async ({ data }) => {
+  const { command, execute } = data.data;
+
+  if (!execute) {
+    return summaryEvent.with(`User reject to execute the command ${command}`);
+  }
+
+  const result = (await cliExecutor.call({ command })) as string;
+
+  return summaryEvent.with(
+    `Executed the command ${command} and got the result: ${result}`,
+  );
+});
+```
+
+3. **Final Response**: The workflow generates a final response based on the execution result and streams it back to the user.
+
+### Tools
+
+The CLI executor tool is defined in [`tools.ts`](src/app/tools.ts):
+
+```typescript
+export const cliExecutor = tool({
+  name: "cli_executor",
+  description: "This tool executes a command and returns the output.",
+  parameters: z.object({ command: z.string() }),
+  execute: async ({ command }) => {
+    try {
+      const output = execSync(command, {
+        encoding: "utf-8",
+      });
+      return output;
+    } catch (error) {
+      console.error(error);
+      return "Command failed";
+    }
+  },
+});
+```
+
+## Architecture
+
+The HITL implementation consists of:
+
+1. **Workflow Factory** (`workflow.ts`): Creates and configures the workflow with event handlers
+2. **Events** (`events.ts`): Defines typed events for human input and response
+3. **Tools** (`tools.ts`): Implements the CLI executor tool
+4. **UI Component** (`components/cli_human_input.tsx`): Provides the user interface for human approval
+5. **Server Entry** (`index.ts`): Configures and starts the LlamaIndexServer
+
+This architecture ensures that dangerous operations like CLI command execution require explicit human approval before proceeding.
@@ -0,0 +1,95 @@
+import { Button } from "@/components/ui/button";
+import { Card, CardContent, CardFooter } from "@/components/ui/card";
+import { JSONValue, useChatUI } from "@llamaindex/chat-ui";
+import React, { FC, useState } from "react";
+import { z } from "zod";
+
+// This schema is equivalent to the CLICommand model defined in events.py
+const CLIInputEventSchema = z.object({
+  command: z.string(),
+});
+type CLIInputEvent = z.infer<typeof CLIInputEventSchema>;
+
+const CLIHumanInput: FC<{
+  events: JSONValue[];
+}> = ({ events }) => {
+  const inputEvent = (events || [])
+    .map((ev) => {
+      const parseResult = CLIInputEventSchema.safeParse(ev);
+      return parseResult.success ? parseResult.data : null;
+    })
+    .filter((ev): ev is CLIInputEvent => ev !== null)
+    .at(-1);
+
+  const { append } = useChatUI();
+  const [confirmedValue, setConfirmedValue] = useState<boolean | null>(null);
+  const [editableCommand, setEditableCommand] = useState<string | undefined>(
+    inputEvent?.command,
+  );
+
+  // Update editableCommand if inputEvent changes (e.g. new event comes in)
+  React.useEffect(() => {
+    setEditableCommand(inputEvent?.command);
+  }, [inputEvent?.command]);
+
+  const handleConfirm = () => {
+    append({
+      content: "Yes",
+      role: "user",
+      annotations: [
+        {
+          type: "human_response",
+          data: {
+            execute: true,
+            command: editableCommand, // Use editable command
+          },
+        },
+      ],
+    });
+    setConfirmedValue(true);
+  };
+
+  const handleCancel = () => {
+    append({
+      content: "No",
+      role: "user",
+      annotations: [
+        {
+          type: "human_response",
+          data: {
+            execute: false,
+            command: inputEvent?.command,
+          },
+        },
+      ],
+    });
+    setConfirmedValue(false);
+  };
+
+  return (
+    <Card className="my-4">
+      <CardContent className="pt-6">
+        <p className="text-sm text-gray-700">
+          Do you want to execute the following command?
+        </p>
+        <input
+          disabled
+          type="text"
+          value={editableCommand || ""}
+          onChange={(e) => setEditableCommand(e.target.value)}
+          className="my-2 w-full overflow-x-auto rounded border border-gray-300 bg-gray-100 p-3 font-mono text-xs text-gray-800"
+        />
+      </CardContent>
+      {confirmedValue === null ? (
+        <CardFooter className="flex justify-end gap-2">
+          <>
+            <Button onClick={handleConfirm}>Yes</Button>
+            <Button onClick={handleCancel}>No</Button>
+          </>
+        </CardFooter>
+      ) : null}
+    </Card>
+  );
+};
+
+export default CLIHumanInput;
@@ -0,0 +1,20 @@
+import { OpenAI } from "@llamaindex/openai";
+import { LlamaIndexServer } from "@llamaindex/server";
+import { Settings } from "llamaindex";
+import { workflowFactory } from "./src/app/workflow";
+
+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  uiConfig: {
+    starterQuestions: [
+      "Check status of git in the current directory",
+      "List all files in the current directory",
+    ],
+    componentsDir: "components",
+  },
+  port: 3000,
+}).start();
@@ -0,0 +1,12 @@
+import { humanInputEvent, humanResponseEvent } from "@llamaindex/server";
+
+export const cliHumanInputEvent = humanInputEvent<{
+  type: "cli_human_input";
+  data: { command: string };
+  response: typeof cliHumanResponseEvent;
+}>();
+
+export const cliHumanResponseEvent = humanResponseEvent<{
+  type: "human_response";
+  data: { execute: boolean; command: string };
+}>();
@@ -0,0 +1,20 @@
+import { execSync } from "child_process";
+import { tool } from "llamaindex";
+import { z } from "zod";
+
+export const cliExecutor = tool({
+  name: "cli_executor",
+  description: "This tool executes a command and returns the output.",
+  parameters: z.object({ command: z.string() }),
+  execute: async ({ command }) => {
+    try {
+      const output = execSync(command, {
+        encoding: "utf-8",
+      });
+      return output;
+    } catch (error) {
+      console.error(error);
+      return "Command failed";
+    }
+  },
+});
@@ -0,0 +1,106 @@
+import { OpenAI } from "@llamaindex/openai";
+import { toAgentRunEvent, writeResponseToStream } from "@llamaindex/server";
+import { chatWithTools } from "@llamaindex/tools";
+import {
+  createWorkflow,
+  getContext,
+  startAgentEvent,
+  stopAgentEvent,
+  withSnapshot,
+  workflowEvent,
+} from "@llamaindex/workflow";
+import { ChatMessage, Settings, ToolCallLLM } from "llamaindex";
+import { cliHumanInputEvent, cliHumanResponseEvent } from "./events";
+import { cliExecutor } from "./tools";
+
+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
+const summaryEvent = workflowEvent<string>(); // simple event to summarize the result
+
+export const workflowFactory = (body: unknown) => {
+  const llm = Settings.llm as ToolCallLLM;
+
+  if (!llm.supportToolCall) {
+    throw new Error("LLM is not a ToolCallLLM");
+  }
+
+  const { messages } = body as { messages: ChatMessage[] };
+
+  const workflow = withSnapshot(createWorkflow());
+
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { userInput, chatHistory = [] } = data;
+    if (!userInput) {
+      throw new Error("User input is required");
+    }
+
+    // in this example, we use chatWithTools to decide should perform a tool call or not
+    // if cli executor is called, emit HumanInputEvent to ask user for permission
+    const toolCallResponse = await chatWithTools(
+      llm,
+      [cliExecutor],
+      chatHistory.concat({ role: "user", content: userInput }),
+    );
+    const cliExecutorToolCall = toolCallResponse.toolCalls.find(
+      (toolCall) => toolCall.name === cliExecutor.metadata.name,
+    );
+    const command = cliExecutorToolCall?.input?.command as string;
+    if (command) {
+      return cliHumanInputEvent.with({
+        type: "cli_human_input",
+        data: { command },
+        response: cliHumanResponseEvent,
+      });
+    }
+
+    // if no tool call, just response as normal
+    return summaryEvent.with("");
+  });
+
+  // do actions after getting response from human
+  workflow.handle([cliHumanResponseEvent], async ({ data }) => {
+    const { sendEvent } = getContext();
+    const { command, execute } = data.data;
+
+    if (!execute) {
+      // stop the workflow if user reject to execute the command
+      return summaryEvent.with(`User reject to execute the command ${command}`);
+    }
+
+    sendEvent(
+      toAgentRunEvent({
+        agent: "CLI Executor",
+        text: `Execute the command "${command}" and return the result`,
+        type: "text",
+      }),
+    );
+
+    const result = (await cliExecutor.call({ command })) as string;
+
+    return summaryEvent.with(
+      `Executed the command ${command} and got the result: ${result}`,
+    );
+  });
+
+  workflow.handle([summaryEvent], async ({ data: summaryResult }) => {
+    const { sendEvent } = getContext();
+
+    const chatHistory = messages;
+    if (summaryResult) {
+      chatHistory.push({ role: "user", content: summaryResult });
+    }
+
+    const stream = await llm.chat({
+      messages: chatHistory,
+      stream: true,
+    });
+
+    const result = await writeResponseToStream(stream, sendEvent);
+
+    return stopAgentEvent.with({ result });
+  });
+
+  return workflow;
+};
@@ -7,19 +7,18 @@
    "dev": "nodemon --exec tsx simple-workflow/calculator.ts"
  },
  "dependencies": {
-    "@llamaindex/openai": "^0.2.0",
-    "@llamaindex/readers": "^3.0.0",
+    "@llamaindex/openai": "~0.4.0",
+    "@llamaindex/readers": "~3.1.4",
    "@llamaindex/server": "workspace:*",
-    "@llamaindex/tools": "0.0.4",
-    "@llamaindex/workflow": "1.1.0",
+    "@llamaindex/tools": "~0.0.11",
    "dotenv": "^16.4.7",
-    "llamaindex": "0.10.2",
-    "zod": "^3.23.8"
+    "llamaindex": "~0.11.0",
+    "zod": "^3.24.2"
  },
  "devDependencies": {
    "@types/node": "^20.10.3",
    "nodemon": "^3.1.10",
-    "tsx": "^4.7.2",
+    "tsx": "4.7.2",
    "typescript": "^5.3.2"
  }
 }
@@ -0,0 +1,68 @@
+# Upload File Example
+
+This example shows how to use the uploaded file (private file) from the user in the workflow.
+
+## Prerequisites
+
+Please follow the setup instructions in the [examples README](../README.md).
+
+You will also need:
+
+- An OpenAI API key
+- The `enableFileUpload` option in the `uiConfig` is set to `true`.
+
+```typescript
+new LlamaIndexServer({
+  // ... other options
+  uiConfig: { enableFileUpload: true },
+}).start();
+```
+
+## How to get the uploaded files in your workflow:
+
+In LlamaIndexServer, the uploaded file is included in chat message annotations. You can easily get the uploaded files from chat messages using the [extractFileAttachments](https://github.com/llamaindex/llamaindex/blob/main/packages/server/src/utils/events.ts) function.
+
+```typescript
+import { type Message } from "ai";
+import { extractFileAttachments } from "@llamaindex/server";
+
+async function workflowFactory(reqBody: { messages: Message[] }) {
+  const attachments = extractFileAttachments(reqBody.messages);
+  // ...
+}
+```
+
+### AgentWorkflow
+
+If you are using AgentWorkflow, to provide file access to the agent, you can create a tool to read the file content. We recommend to use the `fileId` as the parameter of the tool instead of the `filePath` to avoid showing internal file path to the user. You can use the `getStoredFilePath` helper function to get the file path from the file id.
+
+```typescript
+import { getStoredFilePath, extractFileAttachments } from "@llamaindex/server";
+
+const readFileTool = tool(
+  ({ fileId }) => {
+    // Get the file path from the file id
+    const filePath = getStoredFilePath({ id: fileId });
+    return fsPromises.readFile(filePath, "utf8");
+  },
+  {
+    name: "read_file",
+    description: `Use this tool with the file id to read the file content. The available file are: [${attachments.map((file) => file.id).join(", ")}]`,
+    parameters: z.object({
+      fileId: z.string(),
+    }),
+  },
+);
+```
+
+**Tip:** You can either put the attachments file information to the tool description or agent's system prompt.
+
+Check: [agent-workflow.ts](./agent-workflow.ts) for the full example.
+
+### Custom Workflow
+
+In custom workflow, instead of defining a tool, you can use the helper functions (`extractFileAttachments` and `getStoredFilePath`) to work with file attachments in your workflow.
+
+Check: [custom-workflow.ts](./custom-workflow.ts) for the full example.
+
+> To run custom workflow example, update the `index.ts` file to use the `workflowFactory` from `custom-workflow.ts` instead of `agent-workflow.ts`.
@@ -0,0 +1,39 @@
+import { extractFileAttachments, getStoredFilePath } from "@llamaindex/server";
+import { agent } from "@llamaindex/workflow";
+import { type Message } from "ai";
+import { tool } from "llamaindex";
+import { promises as fsPromises } from "node:fs";
+import { z } from "zod";
+
+export const workflowFactory = async (reqBody: { messages: Message[] }) => {
+  const { messages } = reqBody;
+  // Extract the files from the messages
+  const files = extractFileAttachments(messages);
+  const fileIds = files.map((file) => file.id);
+
+  // Define a tool to read the file content using the id
+  const readFileTool = tool(
+    ({ fileId }) => {
+      if (!fileIds.includes(fileId)) {
+        throw new Error(`File with id ${fileId} not found`);
+      }
+
+      const filePath = getStoredFilePath({ id: fileId });
+      return fsPromises.readFile(filePath, "utf8");
+    },
+    {
+      name: "read_file",
+      description: `Use this tool with the id of the file to read the file content. Here are the available file ids: [${fileIds.join(", ")}]`,
+      parameters: z.object({
+        fileId: z.string(),
+      }),
+    },
+  );
+  return agent({
+    tools: [readFileTool],
+    systemPrompt: `
+      You are a helpful assistant that can help the user with their file.
+      You can use the read_file tool to read the file content.
+    `,
+  });
+};
@@ -0,0 +1,98 @@
+import { extractFileAttachments } from "@llamaindex/server";
+import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";
+
+import {
+  agentStreamEvent,
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+  stopAgentEvent,
+  workflowEvent,
+} from "@llamaindex/workflow";
+import { Message } from "ai";
+import { promises as fsPromises } from "node:fs";
+
+const fileHelperEvent = workflowEvent<{
+  userInput: MessageContent;
+  fileContent: string;
+}>();
+
+/**
+ * This is an simple workflow to demonstrate how to use uploaded files in the workflow.
+ */
+export function workflowFactory(reqBody: { messages: Message[] }) {
+  const llm = Settings.llm;
+
+  // First, extract the uploaded file from the messages
+  const attachments = extractFileAttachments(reqBody.messages);
+
+  if (attachments.length === 0) {
+    throw new Error("Please upload a file to start");
+  }
+
+  // Then, add the uploaded file info to the workflow state
+  const { withState, getContext } = createStatefulMiddleware(() => {
+    return {
+      memory: new ChatMemoryBuffer({ llm }),
+      uploadedFile: attachments[attachments.length - 1],
+    };
+  });
+  const workflow = withState(createWorkflow());
+
+  // Handle the start of the workflow: read the file content
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { userInput } = data;
+    // Prepare chat history
+    const { state } = getContext();
+    if (!userInput) {
+      throw new Error("Missing user input to start the workflow");
+    }
+    state.memory.put({ role: "user", content: userInput });
+
+    // Read file content
+    const fileContent = await fsPromises.readFile(
+      state.uploadedFile.path,
+      "utf8",
+    );
+
+    return fileHelperEvent.with({
+      userInput,
+      fileContent,
+    });
+  });
+
+  // Use LLM to help the user with the file content
+  workflow.handle([fileHelperEvent], async ({ data }) => {
+    const { sendEvent } = getContext();
+
+    const prompt = `
+You are a helpful assistant that can help the user with their file.
+
+Here is the provided file content:
+${data.fileContent}
+
+Now, let help the user with this request:
+${data.userInput}
+`;
+
+    const response = await llm.complete({
+      prompt,
+      stream: true,
+    });
+
+    // Stream the response
+    for await (const chunk of response) {
+      sendEvent(
+        agentStreamEvent.with({
+          delta: chunk.text,
+          response: chunk.text,
+          currentAgentName: "agent",
+          raw: chunk.raw,
+        }),
+      );
+    }
+    sendEvent(stopAgentEvent.with({ result: "" }));
+  });
+
+  return workflow;
+}
@@ -0,0 +1,23 @@
+import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
+import { LlamaIndexServer } from "@llamaindex/server";
+import { Settings } from "llamaindex";
+import { workflowFactory } from "./agent-workflow";
+// Uncomment this to use a custom workflow
+// import { workflowFactory } from "./custom-workflow";
+
+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
+Settings.embedModel = new OpenAIEmbedding({
+  model: "text-embedding-3-small",
+});
+
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  suggestNextQuestions: false,
+  uiConfig: {
+    enableFileUpload: true,
+  },
+  port: 3000,
+}).start();
@@ -1,8 +1,13 @@
+import { OpenAI } from "@llamaindex/openai";
 import { LlamaIndexServer } from "@llamaindex/server";
 import { agent } from "@llamaindex/workflow";
-import { tool } from "llamaindex";
+import { Settings, tool } from "llamaindex";
 import { z } from "zod";

+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
 const calculatorAgent = agent({
  tools: [
    tool({
@@ -10,5 +10,5 @@
    "outDir": "dist"
  },
  "include": ["**/*"],
-  "exclude": ["node_modules", "dist", "custom-layout/layout"]
+  "exclude": ["node_modules", "dist", "custom-layout/layout", "hitl/components"]
 }
@@ -1,16 +1,19 @@
-import { type AgentInputData } from "@llamaindex/workflow";
 import { type Message } from "ai";
 import { type MessageType } from "llamaindex";
 import { NextRequest, NextResponse } from "next/server";

 // import chat utils
 import {
+  getHumanResponsesFromMessage,
+  pauseForHumanInput,
+  processWorkflowStream,
  runWorkflow,
  sendSuggestedQuestionsEvent,
  toDataStream,
 } from "./utils";

 // import workflow factory and settings from local file
+import { stopAgentEvent } from "@llamaindex/workflow";
 import { initSettings } from "./app/settings";
 import { workflowFactory } from "./app/workflow";

@@ -21,7 +24,10 @@ export async function POST(req: NextRequest) {
    const reqBody = await req.json();
    const suggestNextQuestions = process.env.SUGGEST_NEXT_QUESTIONS === "true";

-    const { messages } = reqBody as { messages: Message[] };
+    const { messages, id: requestId } = reqBody as {
+      messages: Message[];
+      id?: string;
+    };
    const chatHistory = messages.map((message) => ({
      role: message.role as MessageType,
      content: message.content,
@@ -36,25 +42,31 @@ export async function POST(req: NextRequest) {
        { status: 400 },
      );
    }
-    const workflowInput: AgentInputData = {
-      userInput: lastMessage.content,
-      chatHistory,
-    };

    const abortController = new AbortController();
    req.signal.addEventListener("abort", () =>
      abortController.abort("Connection closed"),
    );

-    const workflow = await workflowFactory(reqBody);
-    const workflowEventStream = await runWorkflow(
-      workflow,
-      workflowInput,
-      abortController.signal,
+    const context = await runWorkflow({
+      workflow: await workflowFactory(reqBody),
+      input: { userInput: lastMessage.content, chatHistory },
+      human: {
+        snapshotId: requestId, // use requestId to restore snapshot
+        responses: getHumanResponsesFromMessage(lastMessage),
+      },
+    });
+
+    const stream = processWorkflowStream(context.stream).until(
+      (event) =>
+        abortController.signal.aborted || stopAgentEvent.include(event),
    );

-    const dataStream = toDataStream(workflowEventStream, {
+    const dataStream = toDataStream(stream, {
      callbacks: {
+        onPauseForHumanInput: async (responseEvent) => {
+          await pauseForHumanInput(context, responseEvent, requestId); // use requestId to save snapshot
+        },
        onFinal: async (completion, dataStreamWriter) => {
          chatHistory.push({
            role: "assistant" as MessageType,
@@ -66,7 +78,6 @@ export async function POST(req: NextRequest) {
        },
      },
    });
-
    return new Response(dataStream, {
      status: 200,
      headers: {
@@ -0,0 +1,57 @@
+import crypto from "node:crypto";
+import fs from "node:fs";
+import path from "node:path";
+
+import { type ServerFile } from "@llamaindex/server";
+
+export const UPLOADED_FOLDER = "output/uploaded";
+
+export async function storeFile(
+  name: string,
+  fileBuffer: Buffer,
+): Promise<ServerFile> {
+  const parts = name.split(".");
+  const fileName = parts[0];
+  const fileExt = parts[1];
+  if (!fileName) {
+    throw new Error("File name is required");
+  }
+  if (!fileExt) {
+    throw new Error("File extension is required");
+  }
+
+  const id = crypto.randomUUID();
+  const fileId = `${sanitizeFileName(fileName)}_${id}.${fileExt}`;
+  const filepath = path.join(UPLOADED_FOLDER, fileId);
+  const fileUrl = await saveFile(filepath, fileBuffer);
+  return {
+    id: fileId,
+    size: fileBuffer.length,
+    type: fileExt,
+    url: fileUrl,
+    path: filepath,
+  };
+}
+
+// Save document to file server and return the file url
+async function saveFile(filepath: string, content: string | Buffer) {
+  if (path.isAbsolute(filepath)) {
+    throw new Error("Absolute file paths are not allowed.");
+  }
+
+  const dirPath = path.dirname(filepath);
+  await fs.promises.mkdir(dirPath, { recursive: true });
+
+  if (typeof content === "string") {
+    await fs.promises.writeFile(filepath, content, "utf-8");
+  } else {
+    await fs.promises.writeFile(filepath, content);
+  }
+
+  const fileurl = `/api/files/${filepath}`;
+  return fileurl;
+}
+
+function sanitizeFileName(fileName: string) {
+  return fileName.replace(/[^a-zA-Z0-9_-]/g, "_");
+}
@@ -0,0 +1,49 @@
+import { type FileAnnotation } from "@llamaindex/server";
+import { NextRequest, NextResponse } from "next/server";
+import { storeFile } from "./helpers";
+
+export async function POST(request: NextRequest) {
+  try {
+    const {
+      name,
+      base64,
+    }: {
+      name: string;
+      base64: string;
+    } = await request.json();
+    if (!base64 || !name) {
+      return NextResponse.json(
+        { error: "base64 and name is required in the request body" },
+        { status: 400 },
+      );
+    }
+
+    const parts = base64.split(",");
+    if (parts.length !== 2) {
+      return NextResponse.json(
+        { error: "Invalid base64 format" },
+        { status: 400 },
+      );
+    }
+
+    const [header, content] = parts;
+    if (!header || !content) {
+      return NextResponse.json(
+        { error: "Invalid base64 format" },
+        { status: 400 },
+      );
+    }
+
+    const fileBuffer = Buffer.from(content, "base64");
+
+    const file = await storeFile(name, fileBuffer);
+
+    return NextResponse.json(file as FileAnnotation);
+  } catch (error) {
+    console.error("[Upload API]", error);
+    return NextResponse.json(
+      { error: (error as Error).message },
+      { status: 500 },
+    );
+  }
+}
@@ -19,7 +19,6 @@ export function ChatMessageContent({
      <ToolAnnotations />
      <ChatMessage.Content.Image />
      <DynamicEvents componentDefs={componentDefs} appendError={appendError} />
-      <ChatMessage.Content.Artifact />
      <ChatMessage.Content.Markdown />
      <ChatMessage.Content.DocumentFile />
      <ChatMessage.Content.Source />
@@ -32,7 +32,10 @@ export default function CustomChatMessages({
            <ChatMessage.Actions />
          </ChatMessage>
        ))}
-        <ChatMessages.Empty />
+        <ChatMessages.Empty
+          heading="Hello there!"
+          subheading="I'm here to help you with your questions."
+        />
        <ChatMessages.Loading />
      </ChatMessages.List>
      <ChatStarter />
@@ -1,7 +1,7 @@
 "use client";

 import {
-  getChatUIAnnotation,
+  getAnnotationData,
  JSONValue,
  MessageAnnotation,
  MessageAnnotationType,
@@ -25,9 +25,8 @@ export const DynamicEvents = ({
  componentDefs: ComponentDef[];
  appendError: (error: string) => void;
 }) => {
-  const {
-    message: { annotations },
-  } = useChatMessage();
+  const { message } = useChatMessage();
+  const annotations = message.annotations;

  const shownWarningsRef = useRef<Set<string>>(new Set()); // track warnings
  const [hasErrors, setHasErrors] = useState(false);
@@ -43,15 +42,16 @@ export const DynamicEvents = ({

    const availableComponents = new Set(componentDefs.map((comp) => comp.type));

-    annotations.forEach((annotation: MessageAnnotation) => {
+    annotations.forEach((item: JSONValue) => {
+      const annotation = item as MessageAnnotation;
      const type = annotation.type;
-      if (!type) return; // skip if annotation doesn't have a type
+      if (!type) return; // Skip if annotation doesn't have a type

-      const events = getChatUIAnnotation(annotations, type);
+      const events = getAnnotationData<JSONValue>(message, type);

      // Skip if it's a built-in component or if we've already shown the warning
      if (
-        BUILT_IN_CHATUI_COMPONENTS.includes(type) ||
+        BUILT_IN_CHATUI_COMPONENTS.includes(type as MessageAnnotationType) ||
        shownWarningsRef.current.has(type)
      ) {
        return;
@@ -69,7 +69,7 @@ export const DynamicEvents = ({

  const components: EventComponent[] = componentDefs
    .map((comp) => {
-      const events = getChatUIAnnotation(annotations, comp.type) as JSONValue[]; // get all event data by type
+      const events = getAnnotationData<JSONValue>(message, comp.type);
      if (!events?.length) return null;
      return { ...comp, events };
    })
@@ -67,6 +67,9 @@ export const SOURCE_MAP: Record<string, () => Promise<any>> = {
    import("../../../toggle-group"),
  [`${SHADCN_IMPORT_PREFIX}/tooltip`]: () => import("../../../tooltip"),

+  ///// CHAT_UI GENERAL  /////
+  [`@llamaindex/chat-ui`]: () => import("@llamaindex/chat-ui"),
+
  ///// WIDGETS FROM CHAT_UI /////
  [`@llamaindex/chat-ui/widgets`]: () => import("@llamaindex/chat-ui/widgets"),

@@ -76,6 +79,9 @@ export const SOURCE_MAP: Record<string, () => Promise<any>> = {
  ///// UTILS /////
  [`@/components/lib/utils`]: () => import("../../../lib/utils"),
  [`@/lib/utils`]: () => import("../../../lib/utils"), // for v0 compatibility
+
+  ///// ZOD /////
+  [`zod`]: () => import("zod"),
 };

 // parse imports from code to get Function constructor arguments and component name
@@ -122,7 +128,7 @@ export async function parseImports(code: string) {
  const importPromises = imports.map(async ({ name, source }) => {
    if (!(source in SOURCE_MAP)) {
      throw new Error(
-        `Fail to import ${name} from ${source}. Reason: Module not found. \nCurrently we only support importing UI components from Shadcn components, widgets from "llamaindex/chat-ui/widgets" and icons from "lucide-react"`,
+        `Fail to import ${name} from ${source}. Reason: Module not found. \nCurrently we only support importing UI components from Shadcn components, widgets and hooks from "llamaindex/chat-ui", icons from "lucide-react" and zod for data validation.`,
      );
    }
    try {
@@ -1,7 +1,9 @@
 "use client";

-import { SourceData } from "@llamaindex/chat-ui";
-import { Markdown as MarkdownUI } from "@llamaindex/chat-ui/widgets";
+import {
+  Markdown as MarkdownUI,
+  SourceData,
+} from "@llamaindex/chat-ui/widgets";
 import { getConfig } from "../../lib/utils";
 const preprocessMedia = (content: string) => {
  // Remove `sandbox:` from the beginning of the URL before rendering markdown
@@ -4,7 +4,7 @@ import { Sparkles, Star } from "lucide-react";

 export function DefaultHeader() {
  return (
-    <div className="flex items-center justify-between px-4 pt-2">
+    <div className="flex items-center justify-between p-2 px-4">
      <div className="flex items-center gap-2">
        <Sparkles className="size-4" />
        <h1 className="font-semibold">LlamaIndex App</h1>
@@ -2,8 +2,7 @@

 import {
  Message,
-  MessageAnnotation,
-  getChatUIAnnotation,
+  getAnnotationData,
  useChatMessage,
  useChatUI,
 } from "@llamaindex/chat-ui";
@@ -21,13 +20,10 @@ export function ToolAnnotations() {
    [messages, message],
  );
  // Get the tool data from the message annotations
-  const annotations = message.annotations as MessageAnnotation[] | undefined;
-  const toolData = annotations
-    ? (getChatUIAnnotation(annotations, "tools") as unknown as ToolData[])
-    : null;
-  return toolData?.[0] ? (
-    <ChatTools data={toolData[0]} artifactVersion={artifactVersion} />
-  ) : null;
+  const toolData = getAnnotationData<ToolData>(message, "tools");
+  if (toolData.length === 0) return null;
+
+  return <ChatTools data={toolData[0]} artifactVersion={artifactVersion} />;
 }

 // TODO: Used to render outputs of tools. If needed, add more renderers here.
@@ -83,9 +79,7 @@ function getArtifactVersion(
  if (!messageId) return undefined;
  let versionIndex = 1;
  for (const m of messages) {
-    const toolData = m.annotations
-      ? (getChatUIAnnotation(m.annotations, "tools") as unknown as ToolData[])
-      : null;
+    const toolData = getAnnotationData<ToolData>(m, "tools");

    if (toolData?.some((t) => t.toolCall.name === "artifact")) {
      if ("id" in m && m.id === messageId) {
@@ -91,6 +91,13 @@
  ::file-selector-button {
    border-color: var(--color-gray-200, currentColor);
  }
+
+  /* Tailwind v4 removed cursor pointer of button and use default cursor */
+  /* https://github.com/shadcn-ui/ui/issues/6843#issuecomment-2696947980 */
+  button:not([disabled]),
+  [role="button"]:not([disabled]) {
+    cursor: pointer;
+  }
 }

@layer base {
@@ -1,6 +1,7 @@
 import type { Metadata } from "next";
 import { Inter } from "next/font/google";

+import "@llamaindex/chat-ui/styles/editor.css";
 import "@llamaindex/chat-ui/styles/markdown.css";
 import "@llamaindex/chat-ui/styles/pdf.css";
 import "./globals.css";
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/server",
  "description": "LlamaIndex Server",
-  "version": "0.2.4",
+  "version": "0.2.8",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -65,7 +65,7 @@
    "@babel/traverse": "^7.27.0",
    "@babel/types": "^7.27.0",
    "@hookform/resolvers": "^5.0.1",
-    "@llamaindex/chat-ui": "0.4.6",
+    "@llamaindex/chat-ui": "0.5.6",
    "@radix-ui/react-accordion": "^1.2.3",
    "@radix-ui/react-alert-dialog": "^1.1.7",
    "@radix-ui/react-aspect-ratio": "^1.1.3",
@@ -114,7 +114,7 @@
  },
  "peerDependencies": {
    "@llamaindex/env": "~0.1.30",
-    "@llamaindex/workflow": "~1.1.3",
+    "@llamaindex/workflow": "~1.1.8",
    "llamaindex": "~0.11.0",
    "zod": "^3.24.2",
    "zod-to-json-schema": "^3.23.3"
@@ -18,6 +18,7 @@ const eslintConfig = [
      "react-hooks/exhaustive-deps": "off",
      "@next/next/no-img-element": "off",
      "@next/next/no-assign-module-variable": "off",
+      "@typescript-eslint/no-empty-object-type": "off",
    },
  },
  {
@@ -12,7 +12,7 @@
    "format": "prettier --ignore-unknown --cache --check .",
    "format:write": "prettier --ignore-unknown --write .",
    "typecheck": "tsc --noEmit",
-    "generate": "tsx app\\api\\chat\\generate.ts"
+    "generate": "tsx app/api/chat/generate.ts"
  },
  "devDependencies": {
    "@eslint/eslintrc": "^3",
@@ -31,7 +31,7 @@
    "prettier-plugin-organize-imports": "^4.1.0",
    "prettier-plugin-tailwindcss": "^0.6.11",
    "tailwindcss": "^4",
-    "tsx": "^4.19.3",
+    "tsx": "4.7.2",
    "tw-animate-css": "1.2.5",
    "typescript": "^5"
  },
@@ -41,12 +41,12 @@
    "@babel/traverse": "^7.27.0",
    "@babel/types": "^7.27.0",
    "@hookform/resolvers": "^5.0.1",
-    "@llamaindex/chat-ui": "0.4.5",
+    "@llamaindex/chat-ui": "0.5.6",
    "@llamaindex/env": "~0.1.30",
    "@llamaindex/openai": "~0.4.0",
    "@llamaindex/readers": "~3.1.4",
    "@llamaindex/tools": "~0.0.11",
-    "@llamaindex/workflow": "~1.1.3",
+    "@llamaindex/workflow": "~1.1.8",
    "@radix-ui/react-accordion": "^1.2.3",
    "@radix-ui/react-alert-dialog": "^1.1.7",
    "@radix-ui/react-aspect-ratio": "^1.1.3",
@@ -1,16 +1,20 @@
-import type { AgentInputData } from "@llamaindex/workflow";
+import { stopAgentEvent } from "@llamaindex/workflow";
 import { type Message } from "ai";
 import { IncomingMessage, ServerResponse } from "http";
 import type { MessageType } from "llamaindex";
 import { type WorkflowFactory } from "../types";
+import { sendSuggestedQuestionsEvent } from "../utils";
+import {
+  getHumanResponsesFromMessage,
+  pauseForHumanInput,
+} from "../utils/hitl";
 import {
  parseRequestBody,
  pipeStreamToResponse,
  sendJSONResponse,
 } from "../utils/request";
 import { toDataStream } from "../utils/stream";
-import { sendSuggestedQuestionsEvent } from "../utils/suggestion";
-import { runWorkflow } from "../utils/workflow";
+import { processWorkflowStream, runWorkflow } from "../utils/workflow";

 export const handleChat = async (
  req: IncomingMessage,
@@ -18,37 +22,47 @@ export const handleChat = async (
  workflowFactory: WorkflowFactory,
  suggestNextQuestions: boolean,
 ) => {
+  const abortController = new AbortController();
+  res.on("close", () => abortController.abort("Connection closed"));
+
  try {
    const body = await parseRequestBody(req);
-    const { messages } = body as { messages: Message[] };
+    const { messages, id: requestId } = body as {
+      messages: Message[];
+      id?: string;
+    };
+
+    const lastMessage = messages[messages.length - 1];
+    if (lastMessage?.role !== "user" || !lastMessage.content) {
+      return sendJSONResponse(res, 400, {
+        error: "Messages cannot be empty and last message must be from user",
+      });
+    }
+
    const chatHistory = messages.map((message) => ({
      role: message.role as MessageType,
      content: message.content,
    }));

-    const lastMessage = messages[messages.length - 1];
-    if (lastMessage?.role !== "user") {
-      return sendJSONResponse(res, 400, {
-        error: "Messages cannot be empty and last message must be from user",
-      });
-    }
-    const workflowInput: AgentInputData = {
-      userInput: lastMessage.content,
-      chatHistory,
-    };
+    const context = await runWorkflow({
+      workflow: await workflowFactory(body),
+      input: { userInput: lastMessage.content, chatHistory },
+      human: {
+        snapshotId: requestId, // use requestId to restore snapshot
+        responses: getHumanResponsesFromMessage(lastMessage),
+      },
+    });

-    const abortController = new AbortController();
-    res.on("close", () => abortController.abort("Connection closed"));
-
-    const workflow = await workflowFactory(body);
-    const workflowEventStream = await runWorkflow(
-      workflow,
-      workflowInput,
-      abortController.signal,
+    const stream = processWorkflowStream(context.stream).until(
+      (event) =>
+        abortController.signal.aborted || stopAgentEvent.include(event),
    );

-    const dataStream = toDataStream(workflowEventStream, {
+    const dataStream = toDataStream(stream, {
      callbacks: {
+        onPauseForHumanInput: async (responseEvent) => {
+          await pauseForHumanInput(context, responseEvent, requestId); // use requestId to save snapshot
+        },
        onFinal: async (completion, dataStreamWriter) => {
          chatHistory.push({
            role: "assistant" as MessageType,
@@ -1,5 +1,9 @@
 export * from "./server";
 export * from "./types";
 export * from "./utils/events";
+export { getStoredFilePath } from "./utils/file";
 export { generateEventComponent } from "./utils/gen-ui";
+export * from "./utils/hitl";
+export * from "./utils/inline";
 export * from "./utils/prompts";
+export * from "./utils/stream";
@@ -47,7 +47,7 @@ export class LlamaIndexServer {
    const componentsApi = this.componentsDir ? "/api/components" : undefined;
    const layoutApi = this.layoutDir ? "/api/layout" : undefined;
    const devMode = uiConfig?.devMode ?? false;
-
+    const enableFileUpload = uiConfig?.enableFileUpload ?? false;
    // content in javascript format
    const content = `
      window.LLAMAINDEX = {
@@ -57,7 +57,8 @@ export class LlamaIndexServer {
        COMPONENTS_API: ${JSON.stringify(componentsApi)},
        LAYOUT_API: ${JSON.stringify(layoutApi)},
        DEV_MODE: ${JSON.stringify(devMode)},
-        SUGGEST_NEXT_QUESTIONS: ${JSON.stringify(this.suggestNextQuestions)}
+        SUGGEST_NEXT_QUESTIONS: ${JSON.stringify(this.suggestNextQuestions)},
+        UPLOAD_API: ${JSON.stringify(enableFileUpload ? "/api/files" : undefined)}
      }
    `;
    fs.writeFileSync(configFile, content);
@@ -18,6 +18,7 @@ export type UIConfig = {
  layoutDir?: string;
  llamaCloudIndexSelector?: boolean;
  devMode?: boolean;
+  enableFileUpload?: boolean;
 };

 export type LlamaIndexServerOptions = NextAppOptions & {
@@ -1,8 +1,15 @@
 import { randomUUID } from "@llamaindex/env";
 import { workflowEvent } from "@llamaindex/workflow";
 import type { Message } from "ai";
-import { MetadataMode, type Metadata, type NodeWithScore } from "llamaindex";
+import {
+  MetadataMode,
+  type ChatMessage,
+  type Metadata,
+  type NodeWithScore,
+} from "llamaindex";
 import { z } from "zod";
+import { getStoredFilePath } from "./file";
+import { getInlineAnnotations } from "./inline";

 // Events that appended to stream as annotations
 export type SourceEventNode = {
@@ -103,6 +110,7 @@ export type DocumentArtifactData = {
  title: string;
  content: string;
  type: string; // markdown, html,...
+  sources?: { id: string }[]; // sources that are used to render citation numbers in the document
 };

 export type CodeArtifact = Artifact<CodeArtifactData> & {
@@ -148,24 +156,22 @@ export const artifactAnnotationSchema = z.object({
  data: artifactSchema,
 });

-export function extractAllArtifacts(messages: Message[]): Artifact[] {
-  const allArtifacts: Artifact[] = [];
+export function extractArtifactsFromMessage(message: ChatMessage): Artifact[] {
+  const inlineAnnotations = getInlineAnnotations(message);
+  const artifacts = inlineAnnotations.filter(
+    (annotation): annotation is z.infer<typeof artifactAnnotationSchema> => {
+      return artifactAnnotationSchema.safeParse(annotation).success;
+    },
+  );
+  return artifacts.map((artifact) => artifact.data);
+}

-  for (const message of messages) {
-    const artifacts =
-      message.annotations
-        ?.filter(
-          (
-            annotation,
-          ): annotation is z.infer<typeof artifactAnnotationSchema> =>
-            artifactAnnotationSchema.safeParse(annotation).success,
-        )
-        .map((annotation) => annotation.data as Artifact) ?? [];
-
-    allArtifacts.push(...artifacts);
-  }
-
-  return allArtifacts;
+export function extractArtifactsFromAllMessages(
+  messages: ChatMessage[],
+): Artifact[] {
+  return messages
+    .flatMap((message) => extractArtifactsFromMessage(message))
+    .sort((a, b) => a.created_at - b.created_at);
 }

 export function extractLastArtifact(
@@ -187,10 +193,10 @@ export function extractLastArtifact(
  requestBody: unknown,
  type?: ArtifactType,
 ): CodeArtifact | DocumentArtifact | Artifact | undefined {
-  const { messages } = (requestBody as { messages?: Message[] }) ?? {};
+  const { messages } = (requestBody as { messages?: ChatMessage[] }) ?? {};
  if (!messages) return undefined;

-  const artifacts = extractAllArtifacts(messages);
+  const artifacts = extractArtifactsFromAllMessages(messages);
  if (!artifacts.length) return undefined;

  if (type) {
@@ -211,3 +217,64 @@ export function extractLastArtifact(

  return artifacts[artifacts.length - 1];
 }
+
+export const fileAnnotationSchema = z.object({
+  id: z.string(),
+  size: z.number(),
+  type: z.string(),
+  url: z.string(),
+});
+
+export const documentFileAnnotationSchema = z.object({
+  type: z.literal("document_file"),
+  data: z.object({
+    files: z.array(fileAnnotationSchema),
+  }),
+});
+type DocumentFileAnnotation = z.infer<typeof documentFileAnnotationSchema>;
+
+export type FileAnnotation = z.infer<typeof fileAnnotationSchema>;
+
+export type ServerFile = FileAnnotation & {
+  path: string;
+};
+
+/**
+ * Extract file attachments from an user message.
+ * @param message - The message to extract file attachments from.
+ * @returns The file attachments.
+ */
+export function extractFileAttachmentsFromMessage(
+  message: Message,
+): ServerFile[] {
+  const fileAttachments: ServerFile[] = [];
+  if (message.role === "user" && message.annotations) {
+    for (const annotation of message.annotations) {
+      if (documentFileAnnotationSchema.safeParse(annotation).success) {
+        const { data } = annotation as DocumentFileAnnotation;
+        for (const file of data.files) {
+          fileAttachments.push({
+            ...file,
+            path: getStoredFilePath({ id: file.id }),
+          });
+        }
+      }
+    }
+  }
+  return fileAttachments;
+}
+
+/**
+ * Extract file attachments from all user messages.
+ * @param messages - The messages to extract file attachments from.
+ * @returns The file attachments.
+ */
+export function extractFileAttachments(messages: Message[]): ServerFile[] {
+  const fileAttachments: ServerFile[] = [];
+
+  for (const message of messages) {
+    fileAttachments.push(...extractFileAttachmentsFromMessage(message));
+  }
+
+  return fileAttachments;
+}
@@ -1,5 +1,6 @@
 import fs from "node:fs";
 import https from "node:https";
+import path from "node:path";

 export async function downloadFile(
  urlToDownload: string,
@@ -29,3 +30,41 @@ export async function downloadFile(
    throw new Error(`Error downloading file: ${error}`);
  }
 }
+
+/**
+ * Returns the full path to a stored file given its id and optional save directory.
+ * If saveDir is not provided, defaults to "output/uploaded".
+ *
+ * @param {Object} params - The parameters object.
+ * @param {string} params.id - The file identifier.
+ * @param {string} [params.saveDir] - Optional directory to save the file.
+ * @returns {string} The full file path.
+ */
+
+/**
+ * Constructs a stored file path from an ID and optional directory.
+ * Uses path.join for cross-platform safety and validates the ID to prevent path traversal.
+ *
+ * @param {Object} params - The parameters object.
+ * @param {string} params.id - The file identifier (must not contain path separators).
+ * @param {string} [params.saveDir] - Optional directory to save the file. Defaults to "output/uploaded".
+ * @returns {string} The full file path.
+ * @throws {Error} If the id contains invalid path characters.
+ */
+export function getStoredFilePath({
+  id,
+  saveDir,
+}: {
+  id: string;
+  saveDir?: string;
+}): string {
+  // Validate id to prevent path traversal and invalid characters
+  if (id.includes("/") || id.includes("\\") || id.includes("..")) {
+    throw new Error(
+      "Invalid file id: path traversal or separators are not allowed.",
+    );
+  }
+  // Use path.join to construct the default directory for cross-platform compatibility
+  const directory = saveDir ?? path.join("output", "uploaded");
+  return path.join(directory, id);
+}
@@ -0,0 +1,64 @@
+import {
+  type WorkflowEvent,
+  type WorkflowEventData,
+  workflowEvent,
+} from "@llamaindex/workflow";
+import type { Message } from "ai";
+import type { JSONValue } from "llamaindex";
+import z from "zod";
+
+export type HumanInputEventData = {
+  type: string;
+  data?: JSONValue;
+  response: WorkflowEvent<HumanResponseEventData>;
+};
+
+export const humanInputEvent = workflowBaseEvent<HumanInputEventData>();
+
+export type HumanResponseEventData = {
+  type: "human_response";
+  data?: JSONValue;
+};
+
+export const humanResponseEvent = workflowBaseEvent<HumanResponseEventData>();
+
+// helper function to extract human responses from message annotations
+export const getHumanResponsesFromMessage = (message: Message) => {
+  const schema = z.object({ type: z.literal("human_response"), data: z.any() });
+  return (
+    message.annotations?.filter(
+      (annotation): annotation is z.infer<typeof schema> =>
+        schema.safeParse(annotation).success,
+    ) ?? []
+  );
+};
+// TODO: move to llama-flow package
+export type BaseEvent<K> = (<T extends K>() => WorkflowEvent<T>) &
+  WorkflowEvent<K>;
+
+export function workflowBaseEvent<K = unknown>(): BaseEvent<K> {
+  const baseEvent = workflowEvent<K>();
+  const derivedEvents = new Set<WorkflowEvent<unknown>>();
+
+  function eventFn<T>(): WorkflowEvent<T> {
+    const event = workflowEvent<T>();
+    derivedEvents.add(event);
+    return event;
+  }
+
+  const originalInclude = baseEvent.include;
+  const enhancedBaseEvent = Object.assign(baseEvent, {
+    include: (
+      instance: WorkflowEventData<unknown>,
+    ): instance is WorkflowEventData<void> => {
+      // Base event accepts its own instances OR instances from any derived events
+      return (
+        originalInclude(instance) ||
+        Array.from(derivedEvents).some((e) => e.include(instance))
+      );
+    },
+  });
+
+  return Object.assign(eventFn, enhancedBaseEvent) as typeof eventFn &
+    typeof baseEvent;
+}
@@ -0,0 +1,4 @@
+export * from "./events";
+export * from "./pause";
+export * from "./resume";
+export * from "./snapshot";
@@ -0,0 +1,25 @@
+import {
+  request,
+  type WorkflowContext,
+  type WorkflowEvent,
+} from "@llamaindex/workflow";
+import { randomUUID } from "node:crypto";
+import type { HumanResponseEventData } from "./events";
+import { ensureSnapshotWorkflowContext, saveSnapshot } from "./snapshot";
+
+// pause the workflow and save the snapshot
+export const pauseForHumanInput = async (
+  context: WorkflowContext,
+  responseEvent: WorkflowEvent<HumanResponseEventData>,
+  snapshotId: string = randomUUID(), // automatically generate a request id if not provided
+) => {
+  const snapshotWorkflowContext = ensureSnapshotWorkflowContext(context);
+  const { snapshot, sendEvent } = snapshotWorkflowContext;
+
+  // send a request event to save the missing step (`humanResponseEvent`) to the snapshot
+  sendEvent(request(responseEvent));
+
+  // get and save snapshot
+  const [_, snapshotData] = await snapshot();
+  await saveSnapshot(snapshotId, snapshotData);
+};
@@ -0,0 +1,28 @@
+import { type Workflow } from "@llamaindex/workflow";
+import type { HumanResponseEventData } from "./events";
+import {
+  ensureSnapshotWorkflow,
+  loadSnapshot,
+  type SnapshotWorkflowContext,
+} from "./snapshot";
+
+// create workflow context from snapshot and start running it from the last missing step
+export const resumeWorkflowFromHumanResponses = async (
+  workflow: Workflow, // the workflow to resume
+  humanResponses: Array<HumanResponseEventData>, // human can send multiple responses
+  snapshotId: string,
+): Promise<SnapshotWorkflowContext> => {
+  // check workflow is snapshotable
+  const snapshotWorkflow = ensureSnapshotWorkflow(workflow);
+
+  const snapshot = await loadSnapshot(snapshotId);
+  if (!snapshot) {
+    // if there is no snapshot, we can't resume the workflow
+    throw new Error("No snapshot found for request id: " + snapshotId);
+  }
+
+  // resume the workflow from the snapshot with human response
+  const context = snapshotWorkflow.resume(humanResponses, snapshot);
+
+  return context;
+};
@@ -0,0 +1,78 @@
+import {
+  withSnapshot,
+  type Workflow,
+  type WorkflowContext,
+} from "@llamaindex/workflow";
+import { promises as fs } from "fs";
+import path from "path";
+
+// @llama-flow doesn't export snapshot types, we need to infer them from the functions
+export type SnapshotWorkflow = ReturnType<typeof withSnapshot<Workflow>>;
+export type SnapshotWorkflowContext = ReturnType<
+  SnapshotWorkflow["createContext"]
+>;
+export type SnapshotData = Awaited<
+  ReturnType<SnapshotWorkflowContext["snapshot"]>
+>[1];
+
+const SNAPSHOTS_DIR = path.join("output", "snapshots");
+
+// Ensure the checkpoints directory exists
+const ensureCheckpointsDir = async () => {
+  try {
+    await fs.mkdir(SNAPSHOTS_DIR, { recursive: true });
+  } catch (error) {
+    console.error("Failed to create checkpoints directory:", error);
+  }
+};
+
+export const saveSnapshot = async (
+  requestId: string,
+  snapshot: SnapshotData,
+) => {
+  try {
+    await ensureCheckpointsDir();
+    const filePath = path.join(SNAPSHOTS_DIR, `${requestId}.json`);
+    await fs.writeFile(filePath, JSON.stringify(snapshot, null, 2), "utf8");
+    console.log(`Snapshot saved to: ${filePath}`);
+  } catch (error) {
+    console.error("Failed to save snapshot:", error);
+    throw error;
+  }
+};
+
+export const loadSnapshot = async (
+  requestId: string,
+): Promise<SnapshotData | undefined> => {
+  try {
+    const filePath = path.join(SNAPSHOTS_DIR, `${requestId}.json`);
+    const data = await fs.readFile(filePath, "utf8");
+    return JSON.parse(data);
+  } catch (error) {
+    if ((error as NodeJS.ErrnoException).code === "ENOENT") {
+      return undefined; // File doesn't exist
+    }
+    console.error("Failed to load snapshot:", error);
+    throw error;
+  }
+};
+
+export function ensureSnapshotWorkflow(workflow: Workflow): SnapshotWorkflow {
+  if (!("resume" in workflow)) {
+    throw new Error(
+      "Workflow is not a snapshot workflow. Please use withSnapshot() to make it snapshotable.",
+    );
+  }
+  return workflow as SnapshotWorkflow;
+}
+
+export function ensureSnapshotWorkflowContext(
+  context: WorkflowContext,
+): SnapshotWorkflowContext {
+  if (!("snapshot" in context)) {
+    throw new Error(
+      "Cannot get snapshot of the workflow. Please use withSnapshot() to make workflow snapshotable.",
+    );
+  }
+  return context as SnapshotWorkflowContext;
+}
@@ -1,6 +1,8 @@
 export * from "./events";
 export * from "./file";
 export * from "./gen-ui";
+export * from "./hitl";
+export * from "./inline";
 export * from "./prompts";
 export * from "./request";
 export * from "./stream";
@@ -0,0 +1,90 @@
+import { agentStreamEvent, type WorkflowEventData } from "@llamaindex/workflow";
+import { type ChatMessage } from "llamaindex";
+import { z } from "zod";
+
+const INLINE_ANNOTATION_KEY = "annotation"; // the language key to detect inline annotation code in markdown
+
+export const AnnotationSchema = z.object({
+  type: z.string(),
+  data: z.any(),
+});
+
+export type Annotation = z.infer<typeof AnnotationSchema>;
+
+export function getInlineAnnotations(message: ChatMessage): Annotation[] {
+  const markdownContent = getMessageMarkdownContent(message);
+
+  const inlineAnnotations: Annotation[] = [];
+
+  // Regex to match annotation code blocks
+  // Matches ```annotation followed by content until closing ```
+  const annotationRegex = new RegExp(
+    `\`\`\`${INLINE_ANNOTATION_KEY}\\s*\\n([\\s\\S]*?)\\n\`\`\``,
+    "g",
+  );
+
+  let match;
+  while ((match = annotationRegex.exec(markdownContent)) !== null) {
+    const jsonContent = match[1]?.trim();
+
+    if (!jsonContent) {
+      continue;
+    }
+
+    try {
+      // Parse the JSON content
+      const parsed = JSON.parse(jsonContent);
+
+      // Validate against the annotation schema
+      const validated = AnnotationSchema.parse(parsed);
+
+      // Extract the artifact data
+      inlineAnnotations.push(validated);
+    } catch (error) {
+      // Skip invalid annotations - they might be malformed JSON or invalid schema
+      console.warn("Failed to parse annotation:", error);
+    }
+  }
+
+  return inlineAnnotations;
+}
+
+/**
+ * To append inline annotations to the stream, we need to wrap the annotation in a code block with the language key.
+ * The language key is `annotation` and the code block is wrapped in backticks.
+ *
+ * \`\`\`annotation
+ * \{
+ *   "type": "artifact",
+ *   "data": \{...\}
+ * \}
+ * \`\`\`
+ */
+export function toInlineAnnotation(item: unknown) {
+  return `\n\`\`\`${INLINE_ANNOTATION_KEY}\n${JSON.stringify(item)}\n\`\`\`\n`;
+}
+
+export function toInlineAnnotationEvent(event: WorkflowEventData<unknown>) {
+  return agentStreamEvent.with({
+    delta: toInlineAnnotation(event.data),
+    response: "",
+    currentAgentName: "assistant",
+    raw: event.data,
+  });
+}
+
+function getMessageMarkdownContent(message: ChatMessage): string {
+  let markdownContent = "";
+
+  if (typeof message.content === "string") {
+    markdownContent = message.content;
+  } else {
+    message.content.forEach((item) => {
+      if (item.type === "text") {
+        markdownContent += item.text;
+      }
+    });
+  }
+
+  return markdownContent;
+}
@@ -1,10 +1,16 @@
-import { agentStreamEvent, type WorkflowEventData } from "@llamaindex/workflow";
+import {
+  agentStreamEvent,
+  type WorkflowEvent,
+  type WorkflowEventData,
+} from "@llamaindex/workflow";
 import {
  createDataStream,
  formatDataStreamPart,
  type DataStreamWriter,
  type JSONValue,
 } from "ai";
+import type { ChatResponseChunk } from "llamaindex";
+import { humanInputEvent, type HumanResponseEventData } from "./hitl";

 /**
 * Configuration options and helper callback methods for stream lifecycle events.
@@ -24,6 +30,11 @@ export interface StreamCallbacks {
    text: string,
    dataStreamWriter: DataStreamWriter,
  ) => Promise<void> | void;
+
+  /** `onPauseForHumanInput`: Called when human input event is emitted. */
+  onPauseForHumanInput?:
+    | ((event: WorkflowEvent<HumanResponseEventData>) => Promise<void> | void)
+    | undefined;
 }

 /**
@@ -61,6 +72,14 @@ export function toDataStream(
              await callbacks.onText(content, dataStreamWriter);
            }
          }
+        } else if (humanInputEvent.include(event)) {
+          const { response, ...rest } = event.data;
+          dataStreamWriter.writeMessageAnnotation(rest); // show human input in UI
+
+          if (callbacks?.onPauseForHumanInput) {
+            await callbacks.onPauseForHumanInput(response);
+            return; // stop the stream
+          }
        } else {
          dataStreamWriter.writeMessageAnnotation(event.data as JSONValue);
        }
@@ -78,3 +97,24 @@ export function toDataStream(
    },
  });
 }
+
+export async function writeResponseToStream(
+  generator: AsyncIterable<ChatResponseChunk<object>>,
+  sendEvent: (event: WorkflowEventData<unknown>) => void,
+) {
+  let response = "";
+  if (generator) {
+    for await (const chunk of generator) {
+      response += chunk.delta;
+      sendEvent(
+        agentStreamEvent.with({
+          delta: chunk.delta,
+          response,
+          currentAgentName: "LLM",
+          raw: chunk.raw,
+        }),
+      );
+    }
+  }
+  return response;
+}
@@ -1,12 +1,11 @@
 import {
  agentToolCallEvent,
  agentToolCallResultEvent,
-  run,
  startAgentEvent,
-  stopAgentEvent,
  WorkflowStream,
  type AgentInputData,
  type Workflow,
+  type WorkflowContext,
  type WorkflowEventData,
 } from "@llamaindex/workflow";
 import {
@@ -15,35 +14,55 @@ import {
  type NodeWithScore,
 } from "llamaindex";
 import {
+  artifactEvent,
  sourceEvent,
  toAgentRunEvent,
  toSourceEvent,
  type SourceEventNode,
 } from "./events";
 import { downloadFile } from "./file";
+import {
+  resumeWorkflowFromHumanResponses,
+  type HumanResponseEventData,
+} from "./hitl/index";
+import { toInlineAnnotationEvent } from "./inline";

-export async function runWorkflow(
-  workflow: Workflow,
-  input: AgentInputData,
-  abortSignal?: AbortSignal,
-): Promise<WorkflowStream<WorkflowEventData<unknown>>> {
-  if (!input.userInput) {
-    throw new Error("Missing user input to start the workflow");
+export async function runWorkflow({
+  workflow,
+  input,
+  human,
+}: {
+  workflow: Workflow;
+  input: AgentInputData;
+  human?: {
+    snapshotId?: string | undefined; // the snapshot id to restore workflow
+    responses?: HumanResponseEventData[]; // the data from human to trigger events after restoring
+  };
+}): Promise<WorkflowContext> {
+  let context: WorkflowContext;
+
+  if (human?.responses?.length && human?.snapshotId) {
+    // resume the workflow if there is human response
+    context = await resumeWorkflowFromHumanResponses(
+      workflow,
+      human.responses,
+      human.snapshotId,
+    );
+  } else {
+    // otherwise, create a new empty context and run the workflow with startAgentEvent
+    context = workflow.createContext();
+    context.sendEvent(
+      startAgentEvent.with({
+        userInput: input.userInput,
+        chatHistory: input.chatHistory,
+      }),
+    );
  }
-  const workflowStream = run(workflow, [
-    startAgentEvent.with({
-      userInput: input.userInput,
-      chatHistory: input.chatHistory,
-    }),
-  ]);

-  // Transform the stream to handle annotations
-  return processWorkflowStream(workflowStream).until(
-    (event) => abortSignal?.aborted || stopAgentEvent.include(event),
-  );
+  return context;
 }

-function processWorkflowStream(
+export function processWorkflowStream(
  stream: WorkflowStream<WorkflowEventData<unknown>>,
 ) {
  return stream.pipeThrough(
@@ -74,6 +93,10 @@ function processWorkflowStream(
              transformedEvent = toSourceEvent(sourceNodes);
            }
          }
+          // Handle artifact events, transform to agentStreamEvent
+          else if (artifactEvent.include(event)) {
+            transformedEvent = toInlineAnnotationEvent(event);
+          }
          // Post-process for llama-cloud files
          if (sourceEvent.include(transformedEvent)) {
            const sourceNodesForDownload = transformedEvent.data.data.nodes; // These are SourceEventNode[]
@@ -1,5 +1,43 @@
 # @create-llama/llama-index-server

+## 0.1.22
+
+### Patch Changes
+
+- 66b81e5: fix cannot catch the error raised from the workflow
+- Updated dependencies [e2486eb]
+  - @llamaindex/server@0.2.8
+
+## 0.1.21
+
+### Patch Changes
+
+- 1ff6eaf: Add support for upload file
+- af9ad3c: feat: show document artifact after generating report
+- a543a27: feat: bump chat-ui with inline artifact
+- Updated dependencies [af9ad3c]
+- Updated dependencies [a543a27]
+- Updated dependencies [1ff6eaf]
+  - @llamaindex/server@0.2.7
+
+## 0.1.20
+
+### Patch Changes
+
+- 087c961: Add support for human-in-the-loop
+- 087c961: Refactor models.py into a separate module
+- Updated dependencies [3ff0a18]
+- Updated dependencies [df10474]
+- Updated dependencies [087c961]
+  - @llamaindex/server@0.2.6
+
+## 0.1.19
+
+### Patch Changes
+
+- Updated dependencies [058b376]
+  - @llamaindex/server@0.2.5
+
 ## 0.1.18

 ### Patch Changes
@@ -8,6 +8,7 @@ LlamaIndexServer is a FastAPI-based application that allows you to quickly launc
 - Built on FastAPI for high performance and easy API development
 - Optional built-in chat UI with extendable UI components
 - Prebuilt development code
+- Human-in-the-loop (HITL) support, check out the [Human-in-the-loop](https://github.com/run-llama/create-llama/blob/main/python/llama-index-server/examples/hitl/README.md) documentation for more details.

 ## Installation

@@ -77,6 +78,7 @@ The LlamaIndexServer accepts the following configuration parameters:
 - `env`: Environment setting ('dev' enables CORS and UI by default)
 - `ui_config`: UI configuration as a dictionary or UIConfig object with options:
  - `enabled`: Whether to enable the chat UI (default: True)
+  - `enable_file_upload`: Whether to enable file upload in the chat UI (default: False). Check [How to get the uploaded files in your workflow](https://github.com/run-llama/create-llama/blob/main/python/llama-index-server/examples/private_file/README.md#how-to-get-the-uploaded-files-in-your-workflow) for more details.
  - `starter_questions`: List of starter questions for the chat UI (default: None)
  - `ui_path`: Path for downloaded UI static files (default: ".ui")
  - `component_dir`: The directory for custom UI components rendering events emitted by the workflow. The default is None, which does not render custom UI components.
@@ -160,6 +162,7 @@ app = LlamaIndexServer(
 The server provides the following default endpoints:

 - `/api/chat`: Chat interaction endpoint
+- `/api/chat/file`: File upload endpoint (only available when `enable_file_upload` in `ui_config` is True)
 - `/api/files/data/*`: Access to data directory files
 - `/api/files/output/*`: Access to output directory files

@@ -0,0 +1,43 @@
+# Examples for llama-index-server
+
+This directory contains examples for llama-index-server.
+
+## How to run the examples
+
+1. Make sure you have [uv](https://docs.astral.sh/uv/) installed.
+
+2. Install the dependencies (with published packages) by running the following command:
+
+    ```bash
+    uv sync
+    ```
+
+3. Navigate to one of the example folders and follow the instructions in the example's README.md file:
+
+- [Simple Agent](./simple-agent/README.md)
+- [HITL](./hitl/README.md)
+- [Artifact](./artifact/README.md)
+- [LlamaCloud](./llamacloud/README.md)
+
+## Local Development
+
+1. For local development, you first need to build the UI resources for the server. At the root of the project, run the following command:
+
+    ```bash
+    pnpm install
+    pnpm build
+    ```
+
+2. Config to use the local llama-index-server package:
+
+    To run the examples with the local llama-index-server package, you need to tell uv to use the virtual environment of the root project
+    by setting the `UV_PROJECT` environment variable.
+
+    ```bash
+    export UV_PROJECT=<absolute path of the root project>
+    ```
+
+    Then continue with step 3 above.
+
+    > You can also use `--project <path to the root project>` instead of setting the `UV_PROJECT` environment variable.
+
@@ -4,7 +4,9 @@ This guide explains how to set up and use the LlamaIndex server with the artifac

 ## Prerequisites

- [uv](https://github.com/astral-sh/uv) installed (a fast Python package manager and runner)
+Please follow the setup instructions in the [examples README](../README.md).
+
+You will also need:
 - An OpenAI API key

 ## Steps
@@ -16,7 +16,8 @@ from llama_index.core.workflow import (
    Workflow,
    step,
 )
-from llama_index.server.api.models import (
+from llama_index.server.api.utils import get_last_artifact
+from llama_index.server.models import (
    Artifact,
    ArtifactEvent,
    ArtifactType,
@@ -24,7 +25,6 @@ from llama_index.server.api.models import (
    CodeArtifactData,
    UIEvent,
 )
-from llama_index.server.api.utils import get_last_artifact


 class Requirement(BaseModel):
@@ -16,7 +16,8 @@ from llama_index.core.workflow import (
    Workflow,
    step,
 )
-from llama_index.server.api.models import (
+from llama_index.server.api.utils import get_last_artifact
+from llama_index.server.models import (
    Artifact,
    ArtifactEvent,
    ArtifactType,
@@ -24,7 +25,6 @@ from llama_index.server.api.models import (
    DocumentArtifactData,
    UIEvent,
 )
-from llama_index.server.api.utils import get_last_artifact


 class DocumentRequirement(BaseModel):
@@ -4,7 +4,7 @@ import { Sparkles, Star } from "lucide-react";

 export default function Header() {
  return (
-    <div className="flex items-center justify-between px-4 pt-2">
+    <div className="flex items-center justify-between p-2 px-4">
      <div className="flex items-center gap-2">
        <Sparkles className="size-4" />
        <h1 className="font-semibold">Artifact Workflow</h1>
@@ -1,13 +1,12 @@
+from code_workflow import ArtifactWorkflow
 from fastapi import FastAPI

-from examples.artifact.code_workflow import ArtifactWorkflow
-
 # To use document artifact workflow, uncomment the following line
-# from examples.artifact.document_workflow import ArtifactWorkflow
+# from document_workflow import ArtifactWorkflow
 from llama_index.core.workflow import Workflow
 from llama_index.llms.openai import OpenAI
 from llama_index.server import LlamaIndexServer, UIConfig
-from llama_index.server.api.models import ChatRequest
+from llama_index.server.models import ChatRequest


 def create_workflow(chat_request: ChatRequest) -> Workflow:
@@ -0,0 +1,121 @@
+# Human in the Loop
+
+This example shows how to use the LlamaIndexServer with a human in the loop. It allows you to start CLI commands that are reviewed by a human before execution.
+
+## Prerequisites
+
+Please follow the setup instructions in the [examples README](../README.md).
+
+## Getting Started
+
+### AgentWorkflow
+
+Using AgentWorkflow, you need to run the following command:
+
+```bash
+uv run -- agent_workflow.py
+```
+
+### Custom Workflow
+
+```bash
+uv run -- custom_workflow.py
+```
+
+### Access the Application
+
+Open your browser and go to:
+
+```
+http://localhost:8000
+```
+
+You will see the LlamaIndexServer UI, where you can interact with the HITL agent. Try "List all files in the current directory" and see how the agent pauses and waits for a human response before executing the command.
+
+## How does HITL it work?
+
+### Events
+
+The human-in-the-loop approach used here is based on a simple idea: the workflow pauses and waits for a human response before proceeding to the next step.
+
+To do this, you will need to implement two custom events: 
+ [HumanInputEvent](../../llama_index/server/models/hitl.py#L21): This event is used to request input from the user.
+ [HumanResponseEvent](../../llama_index/server/models/hitl.py#L10): This event is sent to the workflow to resume execution with input from the user.
+
+In this example, we have implemented these two custom events:  
+
+- [CLIHumanInputEvent](events.py#L20) – to request input from the user for CLI command execution.
+- [CLIHumanResponseEvent](events.py#L8) – to resume the workflow with the response from the user.
+
+### UI Component
+
+HITL also needs a custom UI component, that is shown when the LlamaIndexServer receives the `CLIHumanInputEvent`. The name of the component is defined in the `event_type` field of the `CLIHumanInputEvent` - in our case, it is `cli_human_input`, which corresponds to the [cli_human_input.tsx](./components/cli_human_input.tsx) component.
+
+The custom component must use `append` to send a message with a `human_response` annotation. The data of the annotation must be in the format of the response event `CLIHumanResponseEvent`, in our case, for sending to execute the command `ls -l`, we would send:
+
+```tsx
+append({
+    content: "Yes",
+    role: "user",
+    annotations: [
+    {
+        type: "human_response",
+        data: {
+            execute: true,
+            command: "ls -l" // The command to execute
+        },
+    },
+    ],
+});
+```
+
+This component displays the command to execute and the user can choose to execute or cancel the command execution.
+
+### AgentWorkflow
+
+To make the [AgentWorkflow](agent_workflow.py) work, we use the `wait_for_event()` method to wait for the human response when a tool is called.
+
+Example:
+```python
+async def cli_executor(ctx: Context, command: str) -> str:
+    """
+    This tool carefully waits for user confirmation before executing a command.
+    """
+    confirmation = await ctx.wait_for_event(
+        CLIHumanResponseEvent,
+        waiter_event=CLIHumanInputEvent(
+            data=CLICommand(command=command),
+        ),
+    )
+    if confirmation.execute:
+        # Execute the command
+        ...
+    else:
+        # Cancel the command
+        ...
+
+```
+
+### LlamaIndex Workflows
+
+And for [Custom Workflow](custom_workflow.py), we can define a step that send the `CLIHumanInputEvent` and another step that wait for the `CLIHumanResponseEvent`.
+
+Example:
+```python
+@step
+async def request_input(self, ctx: Context, ev: StartEvent) -> CLIHumanInputEvent:
+    ...
+    return CLIHumanInputEvent(
+        data=CLICommand(command=command),
+        response_event_type=CLIHumanResponseEvent,
+    )
+
+@step
+async def handle_human_response(self, ctx: Context, ev: CLIHumanResponseEvent) -> StopEvent:
+    if ev.execute:
+        # Execute the command
+        ...
+    else:
+        # Cancel the command
+        ...
+```
@@ -0,0 +1,60 @@
+import subprocess
+
+from events import CLICommand, CLIHumanInputEvent, CLIHumanResponseEvent
+from fastapi import FastAPI
+
+from llama_index.core.agent.workflow import AgentWorkflow
+from llama_index.core.workflow import Context
+from llama_index.llms.openai import OpenAI
+from llama_index.server import LlamaIndexServer, UIConfig
+
+
+async def cli_executor(ctx: Context, command: str) -> str:
+    """
+    This tool carefully waits for user confirmation before executing a command.
+    """
+    confirmation = await ctx.wait_for_event(
+        CLIHumanResponseEvent,
+        waiter_event=CLIHumanInputEvent(
+            data=CLICommand(command=command),
+        ),
+    )
+    if confirmation.execute:
+        return subprocess.check_output(confirmation.command, shell=True).decode("utf-8")
+    else:
+        return "Command execution cancelled."
+
+
+def create_workflow() -> AgentWorkflow:
+    return AgentWorkflow.from_tools_or_functions(
+        tools_or_functions=[cli_executor],
+        llm=OpenAI(model="gpt-4.1-mini"),
+        system_prompt="""
+        You are a helpful assistant that help the user execute commands.
+        You can execute commands using the cli_executor tool, don't need to ask for confirmation for triggering the tool.
+        """,
+    )
+
+
+def create_app() -> FastAPI:
+    app = LlamaIndexServer(
+        workflow_factory=create_workflow,
+        suggest_next_questions=False,
+        ui_config=UIConfig(
+            starter_questions=[
+                "List all files in the current directory",
+                "Fetch changes from the remote repository",
+            ],
+            component_dir="components",
+        ),
+    )
+    return app
+
+
+app = create_app()
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run("agent_workflow:app", port=8000, reload=True)
@@ -0,0 +1,96 @@
+import { JSONValue, useChatUI } from "@llamaindex/chat-ui";
+import React, { FC, useState } from "react";
+import { Button } from "@/components/ui/button";
+import { Card, CardContent, CardFooter } from "@/components/ui/card";
+import { z } from "zod";
+
+// This schema is equivalent to the CLICommand model defined in events.py
+const CLIInputEventSchema = z.object({
+  command: z.string(),
+});
+type CLIInputEvent = z.infer<typeof CLIInputEventSchema>;
+
+
+const CLIHumanInput: FC<{
+  events: JSONValue[];
+}> = ({ events }) => {
+  const inputEvent = (events || [])
+    .map((ev) => {
+      const parseResult = CLIInputEventSchema.safeParse(ev);
+      return parseResult.success ? parseResult.data : null;
+    })
+    .filter((ev): ev is CLIInputEvent => ev !== null)
+    .at(-1);
+
+  const { append } = useChatUI();
+  const [confirmedValue, setConfirmedValue] = useState<boolean | null>(null);
+  const [editableCommand, setEditableCommand] = useState<string | undefined>(
+    inputEvent?.command,
+  );
+
+  // Update editableCommand if inputEvent changes (e.g. new event comes in)
+  React.useEffect(() => {
+    setEditableCommand(inputEvent?.command);
+  }, [inputEvent?.command]);
+
+  const handleConfirm = () => {
+    append({
+      content: "Yes",
+      role: "user",
+      annotations: [
+        {
+          type: "human_response",
+          data: {
+            execute: true,
+            command: editableCommand, // Use editable command
+          },
+        },
+      ],
+    });
+    setConfirmedValue(true);
+  };
+
+  const handleCancel = () => {
+    append({
+      content: "No",
+      role: "user",
+      annotations: [
+        {
+          type: "human_response",
+          data: {
+            execute: false,
+            command: inputEvent?.command,
+          },
+        },
+      ],
+    });
+    setConfirmedValue(false);
+  };
+
+  return (
+    <Card className="my-4">
+      <CardContent className="pt-6">
+        <p className="text-sm text-gray-700">
+          Do you want to execute the following command?
+        </p>
+        <input
+          disabled
+          type="text"
+          value={editableCommand || ""}
+          onChange={(e) => setEditableCommand(e.target.value)}
+          className="bg-gray-100 rounded p-3 my-2 text-xs font-mono text-gray-800 overflow-x-auto w-full border border-gray-300"
+        />
+      </CardContent>
+      {confirmedValue === null ? (
+        <CardFooter className="flex justify-end gap-2">
+          <>
+            <Button onClick={handleConfirm}>Yes</Button>
+            <Button onClick={handleCancel}>No</Button>
+          </>
+        </CardFooter>
+      ) : null}
+    </Card>
+  );
+};
+
+export default CLIHumanInput;
@@ -0,0 +1,109 @@
+import platform
+import subprocess
+from typing import Any
+
+from events import CLICommand, CLIHumanInputEvent, CLIHumanResponseEvent
+from fastapi import FastAPI
+
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.settings import Settings
+from llama_index.core.workflow import (
+    Context,
+    StartEvent,
+    StopEvent,
+    Workflow,
+    step,
+)
+from llama_index.server import LlamaIndexServer, UIConfig
+
+
+class CLIWorkflow(Workflow):
+    """
+    A workflow has ability to execute command line tool with human in the loop for confirmation.
+    """
+
+    default_prompt = PromptTemplate(
+        template="""
+        You are a helpful assistant who can write CLI commands to execute using {cli_language}.
+        Your task is to analyze the user's request and write a CLI command to execute.
+
+        ## User Request
+        {user_request}
+
+        Don't be verbose, only respond with the CLI command without any other text.
+        """
+    )
+
+    def __init__(self, **kwargs: Any) -> None:
+        # HITL Workflow should disable timeout otherwise, we will get a timeout error from callback
+        kwargs["timeout"] = None
+        super().__init__(**kwargs)
+
+    @step
+    async def start(self, ctx: Context, ev: StartEvent) -> CLIHumanInputEvent:
+        user_msg = ev.user_msg
+        if user_msg is None:
+            raise ValueError("Missing user_msg in StartEvent")
+        await ctx.set("user_msg", user_msg)
+        # Request LLM to generate a CLI command
+        os_name = platform.system()
+        if os_name == "Linux" or os_name == "Darwin":
+            cli_language = "bash"
+        else:
+            cli_language = "cmd"
+        prompt = self.default_prompt.format(
+            user_request=user_msg, cli_language=cli_language
+        )
+        llm = Settings.llm
+        if llm is None:
+            raise ValueError("Missing LLM in Settings")
+        response = await llm.acomplete(prompt, formatted=True)
+        command = response.text.strip()
+        if command == "":
+            raise ValueError("Couldn't generate a command")
+        # Send the command to the user for confirmation
+        await ctx.set("command", command)
+        return CLIHumanInputEvent(  # type: ignore
+            data=CLICommand(command=command),
+            response_event_type=CLIHumanResponseEvent,
+        )
+
+    @step
+    async def handle_human_response(
+        self,
+        ctx: Context,
+        ev: CLIHumanResponseEvent,  # This event is sent by LlamaIndexServer when user response
+    ) -> StopEvent:
+        # If we have human response, check the confirmation and execute the command
+        if ev.execute:
+            command = ev.command or ""
+            if command == "":
+                raise ValueError("Missing command in CLIExecutionEvent")
+            res = subprocess.run(command, shell=True, capture_output=True, text=True)
+            return StopEvent(result=res.stdout or res.stderr)
+        else:
+            return StopEvent(result=None)
+
+
+def create_app() -> FastAPI:
+    app = LlamaIndexServer(
+        workflow_factory=lambda: CLIWorkflow(),
+        suggest_next_questions=False,
+        ui_config=UIConfig(
+            starter_questions=[
+                "List all files in the current directory",
+                "Fetch changes from the remote repository",
+            ],
+            component_dir="components",
+        ),
+    )
+    return app
+
+
+app = create_app()
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run("custom_workflow:app", port=8000, reload=True)
@@ -0,0 +1,34 @@
+from typing import Type
+
+from pydantic import BaseModel, Field
+
+from llama_index.server.models import HumanInputEvent, HumanResponseEvent
+
+
+class CLIHumanResponseEvent(HumanResponseEvent):
+    execute: bool = Field(
+        description="True if the human wants to execute the command, False otherwise."
+    )
+    command: str = Field(description="The command to execute.")
+
+
+class CLICommand(BaseModel):
+    command: str = Field(description="The command to execute.")
+
+
+# We need an event that extends from HumanInputEvent for HITL feature
+class CLIHumanInputEvent(HumanInputEvent):
+    """
+    CLIInputRequiredEvent is sent when the agent needs permission from the user to execute the CLI command or not.
+    Render this event by showing the command and a boolean button to execute the command or not.
+    """
+
+    event_type: str = (
+        "cli_human_input"  # used by UI to render with appropriate component
+    )
+    response_event_type: Type = (
+        CLIHumanResponseEvent  # used by workflow to resume with the correct event
+    )
+    data: CLICommand = Field(  # the data that sent to the UI for rendering
+        description="The command to execute.",
+    )
@@ -0,0 +1,68 @@
+# LlamaCloud Integration
+
+This guide explains how to set up and use the LlamaIndex server with LlamaCloud for retrieval-augmented generation (RAG) with citation support.
+
+## Prerequisites
+
+Please follow the setup instructions in the [examples README](../README.md).
+
+You will also need:
+- An OpenAI API key
+- A LlamaCloud account and API key
+- A LlamaCloud project with indexed documents
+
+## Steps
+
+1. **Set the Required Environment Variables**
+
+   Export your API keys and LlamaCloud configuration:
+
+   ```sh
+   export OPENAI_API_KEY=your_openai_api_key_here
+   export LLAMA_CLOUD_API_KEY=your_llamacloud_api_key_here
+   export LLAMA_CLOUD_PROJECT_NAME=your_project_name
+   export LLAMA_CLOUD_INDEX_NAME=your_index_name
+   ```
+
+2. **Run the Server Using uv**
+
+   Start the server with the following command:
+
+   ```sh
+   uv run main.py
+   ```
+
+   This will launch the FastAPI server using the LlamaCloud workflow defined in `main.py`.
+
+3. **Access the Application**
+
+   Open your browser and go to:
+
+   ```
+   http://localhost:8000
+   ```
+
+   You will see the LlamaIndex chat app UI with LlamaCloud integration, where you can query your indexed documents.
+
+## Features
+
+- **Document Retrieval**: Query your LlamaCloud indexed documents with two retrieval modes:
+  - **Chunk-level retrieval**: Best for specific, detailed questions requiring precise information
+  - **Document-level retrieval**: Best for high-level summarization and broader context questions
+- **Citation Support**: All responses include citations to the source documents, helping you verify and trace the information back to its origin.
+- **Index Selection**: The UI includes an index selector, allowing you to switch between different LlamaCloud indexes if you have multiple of them.
+
+## How it Works
+
+The workflow uses two specialized query engines:
+
+1. **Chunk Query Engine**: Retrieves specific chunks of documents for detailed, targeted questions
+2. **File Query Engine**: Retrieves entire documents as context for broader, summarization-type questions
+
+Both engines are enhanced with citation capabilities, ensuring transparency and traceability in the responses.
+
+## Notes
+
+- Make sure your LlamaCloud project has documents indexed before running the example
+- The server uses GPT-4.1 by default for optimal performance with citations
+- The workflow automatically selects the appropriate retrieval strategy based on your query type 
@@ -2,13 +2,14 @@ import os
 from typing import List, Optional

 from fastapi import FastAPI
+
 from llama_index.core.agent.workflow import AgentWorkflow
 from llama_index.core.query_engine.retriever_query_engine import RetrieverQueryEngine
 from llama_index.core.settings import Settings
 from llama_index.core.tools import QueryEngineTool, ToolMetadata
 from llama_index.llms.openai import OpenAI
 from llama_index.server import LlamaIndexServer, UIConfig
-from llama_index.server.api.models import ChatRequest
+from llama_index.server.models import ChatRequest
 from llama_index.server.services.llamacloud import LlamaCloudIndex, get_index
 from llama_index.server.tools.index.citation import (
    CITATION_SYSTEM_PROMPT,
@@ -0,0 +1,102 @@
+# Uploaded File
+
+This example shows how to use the uploaded file (private file) from the user in the workflow.
+
+## Prerequisites
+
+Please follow the setup instructions in the [examples README](../README.md).
+
+You will also need:
+- An OpenAI API key
+- Text files for processing (the examples are optimized for smaller text files)
+
+## How to get the uploaded files in your workflow:
+
+The uploaded file information is included in the annotations of a [ChatAPIMessage](../../llama_index/server/models/chat.py#66). You can manually access it through the `chat_request` parameter in the workflow factory. We already provided a [get_file_attachments](../../llama_index/server/utils/chat_attachments.py) helper function to get the uploaded files from the chat request easier.
+
+```python
+from llama_index.server.api.utils.chat_attachments import get_file_attachments
+
+def create_workflow(chat_request: ChatRequest) -> Workflow:
+    uploaded_files = get_file_attachments(chat_request.messages)
+    ...
+```
+
+Each uploaded file item is a [ServerFile](../../llama_index/server/models/chat.py#9) object, which includes the file id, type, size, and url of the uploaded file. The `url` is an access url to the uploaded file that can be used to download or display the file from the browser, the `id` is used to manage the file in the server through the [FileService](../../llama_index/server/services/file.py).
+
+
+## Examples:
+
+### For agent workflow:
+   - We create a simple file reader tool that can read the uploaded file content.
+
+   ```python
+   def create_file_tool(chat_request: ChatRequest) -> Optional[FunctionTool]:
+      """
+      Create a tool to read file if the user uploads a file.
+      """
+      file_ids = []
+      # Get the uploaded file ids from the the chat messages
+      for file in get_file_attachments(chat_request.messages):
+         file_ids.append(file.id)
+      if len(file_ids) == 0:
+         return None
+
+      # Create a tool description that includes the file ids so the LLM knows which file it can access
+      file_tool_description = (
+         "Use this tool with a file id to read the content of the file."
+         f"\nYou only have access to the following file ids: {json.dumps(file_ids)}"
+      )
+
+      def read_file(file_id: str) -> str:
+         file_path = FileService.get_file_path(file_id)
+         try:
+               with open(file_path, "r") as file:
+                  return file.read()
+         except Exception as e:
+               return f"Error reading file {file_path}: {e}"
+      
+      # Create the tool
+      return FunctionTool.from_defaults(
+         fn=read_file,
+         name="read_file",
+         description=file_tool_description,
+      )
+   ```
+   - Check out the [agent-workflow.py](agent-workflow.py) for more details.
+
+   - You can run the agent workflow with file tool by running the following command:
+     ```bash
+     export OPENAI_API_KEY=your_openai_api_key_here
+     uv run agent-workflow.py
+     ```
+     then go to the UI at `http://localhost:8000` and upload the [example.txt](example.txt) file.
+
+### For custom workflow:
+   - The attachments are included in the `attachments` parameter of the `StartEvent` so you can easily access them in the workflow.
+
+   ```python
+   class MyWorkflow(Workflow):
+      @step
+      async def start_event_handler(self, ctx: Context, ev: StartEvent) -> StopEvent:
+         # Get attachments from the start event
+         attachments = ev.attachments
+         # Do something with the attachments
+         # e.g. read the file content
+         last_file = attachments[-1]
+         if last_file:
+            with open(last_file.path, "r") as f:
+               file_content = f.read()
+            ...
+         # or save it to the context for later use
+         await ctx.set("file_content", file_content)
+         return StopEvent()
+   ```
+   - Check out the [custom-workflow.py](custom-workflow.py) for more details.
+
+   - You can run the custom workflow by running the following command:
+     ```bash
+     export OPENAI_API_KEY=your_openai_api_key_here
+     uv run custom-workflow.py
+     ```
+     then go to the UI at `http://localhost:8000` and upload the [example.txt](example.txt) file.
@@ -0,0 +1,78 @@
+import json
+from typing import List, Optional
+
+from fastapi import FastAPI
+
+from llama_index.core.agent.workflow import AgentWorkflow
+from llama_index.core.tools import FunctionTool
+from llama_index.llms.openai import OpenAI
+from llama_index.server import LlamaIndexServer, UIConfig
+from llama_index.server.api.utils.chat_attachments import get_file_attachments
+from llama_index.server.models.chat import ChatRequest
+from llama_index.server.models.file import ServerFile
+from llama_index.server.services.file import FileService
+
+
+def create_file_tool(file_attachments: List[ServerFile]) -> Optional[FunctionTool]:
+    """
+    Create a tool to read file if the user uploads a file.
+    """
+    file_ids = []
+    for file in file_attachments:
+        file_ids.append(file.id)
+    if len(file_ids) == 0:
+        return None
+
+    file_tool_description = (
+        "Use this tool with a file id to read the content of the file."
+        f"\nYou only have access to the following file ids: {json.dumps(file_ids)}"
+    )
+
+    def read_file(file_id: str) -> str:
+        # Validate if the file id is in the list of file ids
+        if file_id not in file_ids:
+            raise ValueError(f"I don't have access to file id {file_id}")
+
+        file_path = FileService.get_file_path(file_id)
+        try:
+            with open(file_path, "r") as file:
+                return file.read()
+        except Exception as e:
+            return f"Error reading file {file_path}: {e}"
+
+    return FunctionTool.from_defaults(
+        fn=read_file,
+        name="read_file",
+        description=file_tool_description,
+    )
+
+
+def create_workflow(chat_request: ChatRequest) -> AgentWorkflow:
+    file_attachments = get_file_attachments(chat_request.messages)
+    file_tool = create_file_tool(file_attachments)
+    return AgentWorkflow.from_tools_or_functions(
+        tools_or_functions=[file_tool] if file_tool else [],
+        llm=OpenAI(model="gpt-4.1-mini"),
+        system_prompt="You are a helpful assistant that can help users with their uploaded files.",
+    )
+
+
+def create_app() -> FastAPI:
+    app = LlamaIndexServer(
+        workflow_factory=create_workflow,
+        suggest_next_questions=False,
+        ui_config=UIConfig(
+            enable_file_upload=True,
+            component_dir="components",
+        ),
+    )
+    return app
+
+
+app = create_app()
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run("agent-workflow:app", host="0.0.0.0", port=8000, reload=True)
@@ -0,0 +1,126 @@
+from typing import Any, List
+
+from fastapi import FastAPI
+
+from llama_index.core.agent.workflow.workflow_events import AgentStream
+from llama_index.core.llms import LLM
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.workflow import (
+    Context,
+    Event,
+    StartEvent,
+    StopEvent,
+    Workflow,
+    WorkflowRuntimeError,
+    step,
+)
+from llama_index.llms.openai import OpenAI
+from llama_index.server import LlamaIndexServer, UIConfig
+from llama_index.server.api.utils.chat_attachments import get_file_attachments
+from llama_index.server.models.chat import ChatRequest
+from llama_index.server.models.file import ServerFile
+
+
+class FileHelpEvent(Event):
+    """
+    The event for helping the user with the an uploaded file.
+    """
+
+    file_content: str
+    user_request: str
+
+
+class FileHelpWorkflow(Workflow):
+    """
+    A simple workflow that helps the user with the an uploaded file.
+    Note: The workflow just simply feed all the file content to the LLM so it won't work for large files.
+    The purpose is just for demo how a workflow can work with the uploaded file from the user.
+    """
+
+    def __init__(
+        self,
+        llm: LLM,
+        file_attachments: List[ServerFile],
+        **kwargs: Any,
+    ):
+        super().__init__(**kwargs)
+        self.llm = llm
+        self.file_attachments = file_attachments
+
+    @step
+    async def read_files(self, ctx: Context, ev: StartEvent) -> FileHelpEvent:
+        user_msg = ev.user_msg
+        if len(self.file_attachments) == 0:
+            raise WorkflowRuntimeError("Please upload one file to start")
+
+        # Read the file content
+        last_file = self.file_attachments[-1]
+        with open(last_file.path, "r") as f:
+            file_content = f.read()
+
+        return FileHelpEvent(
+            file_content=file_content,
+            user_request=user_msg,
+        )
+
+    @step
+    async def help_user(self, ctx: Context, ev: FileHelpEvent) -> StopEvent:
+        default_prompt = PromptTemplate("""
+        You are a writing assistant.
+        You are given a file content and a user request.
+        Your task is to help the user with the file content.
+        
+        User request: {user_msg}
+
+        File content:
+        {file_content}
+        """)
+        prompt = default_prompt.format(
+            user_msg=ev.user_request,
+            file_content=ev.file_content,
+        )
+        stream = await self.llm.astream_complete(prompt)
+        async for chunk in stream:
+            ctx.write_event_to_stream(
+                AgentStream(
+                    response=chunk.text,
+                    delta=chunk.delta or "",
+                    current_agent_name="agent",
+                    tool_calls=[],
+                    raw=chunk.raw,
+                )
+            )
+
+        return StopEvent(
+            content=True,
+        )
+
+
+def create_workflow(chat_request: ChatRequest) -> Workflow:
+    # Use get_file_attachments to get the file attachments from the chat messages
+    file_attachments = get_file_attachments(chat_request.messages)
+    return FileHelpWorkflow(
+        llm=OpenAI(model="gpt-4.1-mini"),
+        file_attachments=file_attachments,
+    )
+
+
+def create_app() -> FastAPI:
+    app = LlamaIndexServer(
+        workflow_factory=create_workflow,
+        suggest_next_questions=False,
+        ui_config=UIConfig(
+            enable_file_upload=True,
+            component_dir="components",
+        ),
+    )
+    return app
+
+
+app = create_app()
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run("custom-workflow:app", host="0.0.0.0", port=8000, reload=True)
@@ -0,0 +1,142 @@
+# ACME Vendor Agreement
+
+**Effective Date:** January 1, 2024
+
+## Parties:
+
+- **Client:** LlamaCo ("Client")
+- **Vendor:** ACME Office Supply, Inc. ("Vendor")
+
+## 1. Overview
+
+This Vendor Agreement ("Agreement") sets forth the terms and conditions under which ACME Office Supply, Inc. will provide office supplies, consumables, related goods ("Products"), and associated data processing services to LlamaCo.
+
+## 2. Definitions
+
+- **Personal Data:** Any information relating to an identified or identifiable natural person ('data subject').
+- **Processing:** Any operation performed on Personal Data, including collection, storage, modification, transfer, or deletion.
+- **Data Controller:** LlamaCo, who determines the purposes and means of processing Personal Data.
+- **Data Processor:** ACME Office Supply, Inc., who processes Personal Data on behalf of the Controller.
+
+## 3. Data Protection and Privacy
+
+### 3.1 Scope of Processing
+
+Vendor shall process Personal Data only:
+
+- To fulfill orders and manage deliveries
+- To provide customer support services
+- To maintain business records
+- To comply with legal obligations
+
+### 3.2 Data Subject Rights
+
+Vendor shall:
+
+- Respond to data subject requests within 30 days
+- Provide data in a structured, commonly used format
+- Implement measures to facilitate data portability
+- Assist with data subject rights requests at no additional cost
+
+### 3.3 Data Transfers and Storage
+
+- Vendor maintains primary data centers in the United States
+- Vendor may transfer data to any country where it maintains operations
+- No prior notification required for new data storage locations
+- Vendor will rely on its standard data transfer mechanisms
+- Data may be processed by staff operating outside the EEA
+
+### 3.4 Subprocessors
+
+- Vendor may engage subprocessors without prior Client approval
+- Subprocessors may be located in any jurisdiction globally
+- Notice of new subprocessors provided within 30 days of engagement
+- Client has no right to object to new subprocessors
+
+## 4. Security Measures
+
+### 4.1 Technical and Organizational Measures
+
+Vendor shall implement appropriate measures including:
+
+- Encryption of Personal Data in transit and at rest
+- Access controls and authentication
+- Regular security testing and assessments
+- Employee training on data protection
+- Incident response procedures
+
+### 4.2 Data Breaches
+
+Vendor shall:
+
+- Notify Client of any Personal Data breach within 72 hours
+- Provide details necessary to meet regulatory requirements
+- Cooperate with Client's breach investigation
+- Maintain records of all data breaches
+
+## 5. Data Retention
+
+### 5.1 Retention Period
+
+- Personal Data retained only as long as necessary
+- Standard retention period of 3 years after last transaction
+- Deletion of Personal Data upon written request
+- Backup copies retained for maximum of 6 months
+
+### 5.2 Termination
+
+Upon termination of services:
+
+- Return all Personal Data in standard format
+- Delete existing copies within 30 days
+- Provide written confirmation of deletion
+- Cease all processing activities
+
+## 6. Compliance and Audit
+
+### 6.1 Documentation
+
+Vendor shall maintain:
+
+- Records of all processing activities
+- Security measure documentation
+- Data transfer mechanisms
+- Subprocessor agreements
+
+### 6.2 Audits
+
+- Annual compliance audits permitted
+- 30 days notice required for audits
+- Vendor to provide necessary documentation
+- Client bears reasonable audit costs
+
+## 7. Liability and Indemnification
+
+### 7.1 Liability
+
+- Vendor liable for data protection violations
+- Reasonable compensation for damages
+- Coverage for regulatory fines where applicable
+- Joint liability as required by law
+
+## 8. Governing Law
+
+This Agreement shall be governed by the laws of Ireland, without regard to its conflict of laws principles.
+
+---
+
+IN WITNESS WHEREOF, the parties have executed this Agreement as of the Effective Date.
+
+**LlamaCo**
+
+By: **_
+Name: [Authorized Representative]  
+Title: [Title]  
+Date: _**
+
+**ACME Office Supply, Inc.**
+
+By: **_  
+Name: [Authorized Representative]  
+Title: [Title]  
+Date: _**
@@ -0,0 +1,12 @@
+[project]
+name = "llama-index-server-examples"
+version = "0.1.0"
+requires-python = ">=3.10"
+dependencies = [
+    "llama-index-server",
+    "llama-index-llms-openai>=0.4.2",
+    "e2b-code-interpreter>=1.1.1,<2.0.0",
+    "llama-cloud>=0.1.17,<1.0.0",
+    "markdown>=3.7,<4.0",
+    "xhtml2pdf>=0.2.17,<1.0.0",
+]
@@ -4,7 +4,9 @@ This guide explains how to set up and use the LlamaIndex server with a simple ch

 ## Prerequisites

- [uv](https://github.com/astral-sh/uv) installed (a fast Python package manager and runner)
+Please follow the setup instructions in the [examples README](../README.md).
+
+You will also need:
 - An OpenAI API key

 ## Steps
@@ -22,10 +24,10 @@ This guide explains how to set up and use the LlamaIndex server with a simple ch
   Start the server with the following command:

   ```sh
-   uv run workflow.py
+   uv run main.py
   ```

-   This will launch the FastAPI server using the workflow defined in `main.py`.
+   This will launch the FastAPI server using the workflow defined in `app/workflow.py`.

 3. **Access the Application**

--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
github-actions[bot]	3589f946a9	Release 0.2.8 (#685 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-06-12 18:09:12 +07:00
Thuc Pham	e2486eb080	feat: support human in the loop for TS (#686 ) * feat: support human in the loop for TS * add example for custom workflow * fix: need to request humanResponseEvent to save missing step to snapshot * refactor: human response data should be any * refactor runWorkflow function to support resume stream * refactor: hitl * fix: workflow * add summary event * send tool event * use requestId from Vercel * update chat route.ts * fix copy utils/* * refactor: workflow and stream * Create eight-moons-perform.md * update typo * make schema simple * fix typo * use messages in startAgentEvent * save to snapshots folder * fix lint * feat: workflowBaseEvent * include response event in input event * simplify type * update readme * update document * fix typecheck * bump: "@llamaindex/workflow": "~1.1.8" * remove any * use fixed tsx version to fix e2e * fix wrong copy * add cli hitl examples as a use case for both Python and TS * update changeset to release create-llama also * fix e2e * fix e2e * hitl frontend chat * try disable hitl test	2025-06-12 18:00:10 +07:00
Huu Le	66b81e5323	fix cannot catch the error raised from the workflow (#684 )	2025-06-09 16:53:49 +07:00
github-actions[bot]	924649c025	Release 0.1.21 (#680 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-06-06 17:19:25 +07:00
Thuc Pham	1b04db917b	fix lint for release (#682 )	2025-06-06 16:43:45 +07:00
Thuc Pham	af9ad3c42d	feat: show document artifact after generating report (#658 ) * feat: show document artifact after generating report * keep chat message content as it is * use artifactEvent from server * add deep research example * bump chat-ui for new editor * import editor css * hide warning for workflowEvent<{}>() in eject mode * fix format * use CL for better testing * generate artifact after streaming report in Python * bump chat-ui to support citations * use isinstance to check stream * fix document editor spacing * Create tame-wolves-obey.md * add sources to document artifact * add sources to document artifact in python * type cast * no need score * fix lint * move handle stream logic to server * refactor: use chunk.text and chunk.raw * bump chat-ui 0.5.6 to fix citations * update changset * fix lock	2025-06-06 16:34:52 +07:00
Huu Le	1ff6eaf3e1	feat: Support upload private file (#674 ) * init private support for python BE * feat: Add private file handling and upload support in FastAPI - Introduced `main.py` to set up the FastAPI application with file upload capabilities. - Created `workflow.py` to manage file reading and tool creation for uploaded files. - Updated `server.py` to include upload API configuration. - Modified chat router to handle file uploads and return server file metadata. - Refactored chat models to support new file handling structure. - Enhanced file service to manage private file storage and retrieval. * add process base64 and update examples * add readme example * fix test * feat: Add file upload support to LlamaIndexServer TS * add get_file to fileservice * refactor: Simplify file storage logic in helpers.ts * update example * attach file to user message * fix example, improve model * feat: Add file upload support and enhance chat workflow in LlamaIndexServer * remove redundant change * support agent workflow for ts * Enhance README and add file upload examples for LlamaIndex Server. Updated instructions for running examples and added new workflows for handling uploaded files. Included detailed notes on using file attachments in workflows. * update doc * update example * Enhance README with detailed instructions for file upload in chat UI. Update custom workflow to handle file attachments and modify chat router to remove unused attachment handling. Refactor create_workflow to pass attachments from chat request. * Refactor file handling in workflows by updating the create_file_tool function to accept file attachments directly. Introduce a new ServerFileResponse model for better file response handling. Update chat router to utilize the new FileUpload model for file uploads. Clean up imports and ensure consistent file attachment processing across workflows. * Enhance file handling in workflows by updating README and example files. Introduce a new `workflowFactory` structure to support file attachments, and improve the `extractFileAttachments` function for better clarity and usability. Update descriptions in tools to reflect changes in file ID handling. * fix unstoppable * chore: fix issues * add changeset * bump chat-ui * bump chat-ui for eject project --------- Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-06-06 15:58:56 +07:00
Thuc Pham	a543a27faf	feat: bump chat-ui with inline artifact (#675 ) * feat: bump chat-ui with inline artifact * bump chat-ui 0.5.0 * update extractLastArtifact * fix: imports * fix: circle import * missing export * update document gen workflow * remove artifactEvent for annotations * update document * bump chat-ui 0.5.1 to fix parsing $ * bump chat-ui 0.5.2 * toArtifactEvent internal * update doc to use toArtifactEvent * do workflow transformmation internal * revert doc * keep contract * fix format * update get_last_artifact to extract inline annotations in Python * fix imports * Transforms ArtifactEvent to AgentStream with inline annotation format * Create thick-turtles-deny.md * donot use relative imports * toInlineAnnotationEvent * to_inline_annotation_event in python * refactor: move toInlineAnnotationEvent to inline.ts * update comment * rename ArtifactTransform to InlineAnnotationTransformer * add codegen example --------- Co-authored-by: leehuwuj <leehuwuj@gmail.com>	2025-06-05 10:20:21 +07:00
Thuc Pham	63edd74ba1	fix: conflict package versions in ts examples (#678 )	2025-06-05 09:25:54 +07:00
Marcus Schiesser	13a967b2a2	docs: improved python readmes	2025-06-03 14:57:57 +07:00
Huu Le	2ac4d92493	chore: update examples (#677 )	2025-06-03 14:33:27 +07:00
Marcus Schiesser	7e47cba4ba	docs: clarify HITL example	2025-06-03 08:52:45 +07:00
github-actions[bot]	bc56fa3c5f	Release 0.5.20 (#671 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-06-02 18:02:05 +07:00
Huu Le	087c96164d	feat: [server] Add Human in the Loop example with FastAPI integration (#630 )	2025-06-02 17:47:04 +07:00
Thuc Pham	3ff0a18876	fix: default header padding (#672 )	2025-05-31 14:08:29 +07:00
Thuc Pham	df1047480a	fix: missing cursor pointer for button (#670 )	2025-05-30 09:52:17 +07:00
Marcus Schiesser	8d89223a08	chore: fill empty chat message default	2025-05-29 21:05:53 +07:00
github-actions[bot]	49a944182f	Release 0.2.5 (#669 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-29 13:06:58 +07:00
Marcus Schiesser	058b3762c1	fix: update generate script path for ejected project (#668 )	2025-05-29 12:21:17 +07:00