remove bin from git

2026-07-02 19:14:28 -04:00 · 2025-05-28 18:38:28 +07:00
57 changed files with 292 additions and 1198 deletions
@@ -1,11 +1,5 @@
 # create-llama

-## 0.5.20
-
-### Patch Changes
-
- 3ff0a18: fix: default header padding
-
 ## 0.5.19

 ### Patch Changes
@@ -1,6 +1,6 @@
 {
  "name": "create-llama",
-  "version": "0.5.20",
+  "version": "0.5.19",
  "description": "Create LlamaIndex-powered apps with one command",
  "keywords": [
    "rag",
@@ -4,7 +4,7 @@ import { Sparkles, Star } from "lucide-react";

 export default function Header() {
  return (
-    <div className="flex items-center justify-between p-2 px-4">
+    <div className="flex items-center justify-between px-4 pt-2">
      <div className="flex items-center gap-2">
        <Sparkles className="size-4" />
        <h1 className="font-semibold">LlamaIndex App</h1>
@@ -1,6 +1,9 @@
 # server contains Nextjs frontend code (not compiled)
 server/

+# bin will be created automatically when running `pnpm install`
+bin/
+
 # the ejected nextjs project
 project/

@@ -1,19 +1,5 @@
 # @llamaindex/server

-## 0.2.6
-
-### Patch Changes
-
- 3ff0a18: fix: default header padding
- df10474: fix: missing cursor pointer for button
- 087c961: Support zod and chat-ui hooks for custom components
-
-## 0.2.5
-
-### Patch Changes
-
- 058b376: Fix generate script for ejected project
-
 ## 0.2.4

 ### Patch Changes
@@ -1,172 +0,0 @@
-#!/usr/bin/env node
-
-const fs = require("fs").promises;
-const path = require("path");
-
-// Resolve the project directory in node_modules/@llamaindex/server/project
-// This is the template that used to construct the nextjs project
-const projectDir = path.resolve(__dirname, "../project");
-
-// Resolve the src directory that contains workflow & setting files
-const srcDir = path.join(process.cwd(), "src");
-const srcAppDir = path.join(srcDir, "app");
-const generateFile = path.join(srcDir, "generate.ts");
-const envFile = path.join(process.cwd(), ".env");
-
-// The environment variables that are used as LlamaIndexServer configs
-const SERVER_CONFIG_VARS = [
-  {
-    key: "OPENAI_API_KEY",
-    defaultValue: "<your-openai-api-key>",
-    description: "OpenAI API key",
-  },
-  {
-    key: "SUGGEST_NEXT_QUESTIONS",
-    defaultValue: "true",
-    description: "Whether to suggest next questions (`suggestNextQuestions`)",
-  },
-  {
-    key: "COMPONENTS_DIR",
-    defaultValue: "components",
-    description: "Directory for custom components (`componentsDir`)",
-  },
-  {
-    key: "WORKFLOW_FILE_PATH",
-    defaultValue: "app/api/chat/app/workflow.ts",
-    description: "The path to the workflow file (will be updated in dev mode)",
-  },
-  {
-    key: "NEXT_PUBLIC_USE_COMPONENTS_DIR",
-    defaultValue: "true",
-    description: "Whether to enable components directory feature on frontend",
-  },
-  {
-    key: "NEXT_PUBLIC_DEV_MODE",
-    defaultValue: "true",
-    description: "Whether to enable dev mode (`devMode`)",
-  },
-  {
-    key: "NEXT_PUBLIC_STARTER_QUESTIONS",
-    defaultValue: '["Summarize the document", "What are the key points?"]',
-    description:
-      "Initial questions to display in the chat (`starterQuestions`)",
-  },
-  {
-    key: "NEXT_PUBLIC_SHOW_LLAMACLOUD_SELECTOR",
-    defaultValue: "false",
-    description:
-      "Whether to show LlamaCloud selector for frontend (`llamaCloudIndexSelector`)",
-  },
-];
-
-async function eject() {
-  try {
-    // validate required directories (nextjs project template, src directory, src/app directory)
-    const requiredDirs = [projectDir, srcDir, srcAppDir];
-    for (const dir of requiredDirs) {
-      const exists = await fs
-        .access(dir)
-        .then(() => true)
-        .catch(() => false);
-      if (!exists) {
-        console.error("Error: directory does not exist at", dir);
-        process.exit(1);
-      }
-    }
-
-    // Get destination directory from command line arguments (pnpm eject <path>)
-    const args = process.argv;
-    const outputIndex = args.indexOf("eject");
-    const destDir =
-      outputIndex !== -1 && args[outputIndex + 1]
-        ? path.resolve(args[outputIndex + 1]) // Use provided path after eject
-        : path.join(process.cwd(), "next"); // Default to "next" folder in the current working directory
-
-    // remove destination directory if it exists
-    await fs.rm(destDir, { recursive: true, force: true });
-
-    // create destination directory
-    await fs.mkdir(destDir, { recursive: true });
-
-    // Copy the nextjs project template to the destination directory
-    await fs.cp(projectDir, destDir, { recursive: true });
-
-    // copy src/app/* to destDir/app/api/chat
-    const chatRouteDir = path.join(destDir, "app", "api", "chat");
-    await fs.cp(srcAppDir, path.join(chatRouteDir, "app"), { recursive: true });
-
-    // nextjs project doesn't depend on @llamaindex/server anymore, we need to update the imports in workflow file
-    const workflowFile = path.join(chatRouteDir, "app", "workflow.ts");
-    let workflowContent = await fs.readFile(workflowFile, "utf-8");
-    workflowContent = workflowContent.replace("@llamaindex/server", "../utils");
-    await fs.writeFile(workflowFile, workflowContent);
-
-    // copy generate.ts if it exists
-    const genFilePath = path.join(chatRouteDir, "generate.ts");
-    const genFileExists = await copy(generateFile, genFilePath);
-    if (genFileExists) {
-      // update the import @llamaindex/server in generate.ts
-      let genContent = await fs.readFile(genFilePath, "utf-8");
-      genContent = genContent.replace("@llamaindex/server", "./utils");
-      await fs.writeFile(genFilePath, genContent);
-    }
-
-    // copy folders in root directory if exists
-    const rootFolders = ["components", "data", "output", "storage"];
-    for (const folder of rootFolders) {
-      await copy(path.join(process.cwd(), folder), path.join(destDir, folder));
-    }
-
-    // copy .env if it exists or create a new one
-    const envFileExists = await copy(envFile, path.join(destDir, ".env"));
-    if (!envFileExists) {
-      await fs.writeFile(path.join(destDir, ".env"), "");
-    }
-
-    // update .env file with more server configs
-    let envFileContent = await fs.readFile(path.join(destDir, ".env"), "utf-8");
-    for (const envVar of SERVER_CONFIG_VARS) {
-      const { key, defaultValue, description } = envVar;
-      if (!envFileContent.includes(key)) {
-        // if the key is not exists in the env file, add it
-        envFileContent += `\n# ${description}\n${key}=${defaultValue}\n`;
-      }
-    }
-    await fs.writeFile(path.join(destDir, ".env"), envFileContent);
-
-    // rename gitignore -> .gitignore
-    await fs.rename(
-      path.join(destDir, "gitignore"),
-      path.join(destDir, ".gitignore"),
-    );
-
-    // user can customize layout directory in nextjs project, remove layout api
-    await fs.rm(path.join(destDir, "app", "api", "layout"), {
-      recursive: true,
-      force: true,
-    });
-
-    // remove no-needed files
-    await fs.unlink(path.join(destDir, "public", "config.js"));
-    await fs.unlink(path.join(destDir, "next-build.config.ts"));
-
-    console.log("Successfully ejected @llamaindex/server to", destDir);
-  } catch (error) {
-    console.error("Error during eject:", error.message);
-    process.exit(1);
-  }
-}
-
-// copy src to dest if src exists, return true if src exists
-async function copy(src, dest) {
-  const srcExists = await fs
-    .access(src)
-    .then(() => true)
-    .catch(() => false);
-  if (srcExists) {
-    await fs.cp(src, dest, { recursive: true });
-  }
-  return srcExists;
-}
-
-eject();
@@ -4,7 +4,7 @@ import { Sparkles, Star } from "lucide-react";

 export default function Header() {
  return (
-    <div className="flex items-center justify-between p-2 px-4">
+    <div className="flex items-center justify-between px-4 pt-2">
      <div className="flex items-center gap-2">
        <Sparkles className="size-4" />
        <h1 className="font-semibold">LlamaIndex App</h1>
@@ -32,10 +32,7 @@ export default function CustomChatMessages({
            <ChatMessage.Actions />
          </ChatMessage>
        ))}
-        <ChatMessages.Empty
-          heading="Hello there!"
-          subheading="I'm here to help you with your questions."
-        />
+        <ChatMessages.Empty />
        <ChatMessages.Loading />
      </ChatMessages.List>
      <ChatStarter />
@@ -1,7 +1,7 @@
 "use client";

 import {
-  getAnnotationData,
+  getChatUIAnnotation,
  JSONValue,
  MessageAnnotation,
  MessageAnnotationType,
@@ -25,8 +25,9 @@ export const DynamicEvents = ({
  componentDefs: ComponentDef[];
  appendError: (error: string) => void;
 }) => {
-  const { message } = useChatMessage();
-  const annotations = message.annotations;
+  const {
+    message: { annotations },
+  } = useChatMessage();

  const shownWarningsRef = useRef<Set<string>>(new Set()); // track warnings
  const [hasErrors, setHasErrors] = useState(false);
@@ -42,16 +43,15 @@ export const DynamicEvents = ({

    const availableComponents = new Set(componentDefs.map((comp) => comp.type));

-    annotations.forEach((item: JSONValue) => {
-      const annotation = item as MessageAnnotation;
+    annotations.forEach((annotation: MessageAnnotation) => {
      const type = annotation.type;
-      if (!type) return; // Skip if annotation doesn't have a type
+      if (!type) return; // skip if annotation doesn't have a type

-      const events = getAnnotationData<JSONValue>(message, type);
+      const events = getChatUIAnnotation(annotations, type);

      // Skip if it's a built-in component or if we've already shown the warning
      if (
-        BUILT_IN_CHATUI_COMPONENTS.includes(type as MessageAnnotationType) ||
+        BUILT_IN_CHATUI_COMPONENTS.includes(type) ||
        shownWarningsRef.current.has(type)
      ) {
        return;
@@ -69,7 +69,7 @@ export const DynamicEvents = ({

  const components: EventComponent[] = componentDefs
    .map((comp) => {
-      const events = getAnnotationData<JSONValue>(message, comp.type);
+      const events = getChatUIAnnotation(annotations, comp.type) as JSONValue[]; // get all event data by type
      if (!events?.length) return null;
      return { ...comp, events };
    })
@@ -67,9 +67,6 @@ export const SOURCE_MAP: Record<string, () => Promise<any>> = {
    import("../../../toggle-group"),
  [`${SHADCN_IMPORT_PREFIX}/tooltip`]: () => import("../../../tooltip"),

-  ///// CHAT_UI GENERAL  /////
-  [`@llamaindex/chat-ui`]: () => import("@llamaindex/chat-ui"),
-
  ///// WIDGETS FROM CHAT_UI /////
  [`@llamaindex/chat-ui/widgets`]: () => import("@llamaindex/chat-ui/widgets"),

@@ -79,9 +76,6 @@ export const SOURCE_MAP: Record<string, () => Promise<any>> = {
  ///// UTILS /////
  [`@/components/lib/utils`]: () => import("../../../lib/utils"),
  [`@/lib/utils`]: () => import("../../../lib/utils"), // for v0 compatibility
-
-  ///// ZOD /////
-  [`zod`]: () => import("zod"),
 };

 // parse imports from code to get Function constructor arguments and component name
@@ -128,7 +122,7 @@ export async function parseImports(code: string) {
  const importPromises = imports.map(async ({ name, source }) => {
    if (!(source in SOURCE_MAP)) {
      throw new Error(
-        `Fail to import ${name} from ${source}. Reason: Module not found. \nCurrently we only support importing UI components from Shadcn components, widgets and hooks from "llamaindex/chat-ui", icons from "lucide-react" and zod for data validation.`,
+        `Fail to import ${name} from ${source}. Reason: Module not found. \nCurrently we only support importing UI components from Shadcn components, widgets from "llamaindex/chat-ui/widgets" and icons from "lucide-react"`,
      );
    }
    try {
@@ -1,9 +1,7 @@
 "use client";

-import {
-  Markdown as MarkdownUI,
-  SourceData,
-} from "@llamaindex/chat-ui/widgets";
+import { SourceData } from "@llamaindex/chat-ui";
+import { Markdown as MarkdownUI } from "@llamaindex/chat-ui/widgets";
 import { getConfig } from "../../lib/utils";
 const preprocessMedia = (content: string) => {
  // Remove `sandbox:` from the beginning of the URL before rendering markdown
@@ -4,7 +4,7 @@ import { Sparkles, Star } from "lucide-react";

 export function DefaultHeader() {
  return (
-    <div className="flex items-center justify-between p-2 px-4">
+    <div className="flex items-center justify-between px-4 pt-2">
      <div className="flex items-center gap-2">
        <Sparkles className="size-4" />
        <h1 className="font-semibold">LlamaIndex App</h1>
@@ -2,7 +2,8 @@

 import {
  Message,
-  getAnnotationData,
+  MessageAnnotation,
+  getChatUIAnnotation,
  useChatMessage,
  useChatUI,
 } from "@llamaindex/chat-ui";
@@ -20,10 +21,13 @@ export function ToolAnnotations() {
    [messages, message],
  );
  // Get the tool data from the message annotations
-  const toolData = getAnnotationData<ToolData>(message, "tools");
-  if (toolData.length === 0) return null;
-
-  return <ChatTools data={toolData[0]} artifactVersion={artifactVersion} />;
+  const annotations = message.annotations as MessageAnnotation[] | undefined;
+  const toolData = annotations
+    ? (getChatUIAnnotation(annotations, "tools") as unknown as ToolData[])
+    : null;
+  return toolData?.[0] ? (
+    <ChatTools data={toolData[0]} artifactVersion={artifactVersion} />
+  ) : null;
 }

 // TODO: Used to render outputs of tools. If needed, add more renderers here.
@@ -79,7 +83,9 @@ function getArtifactVersion(
  if (!messageId) return undefined;
  let versionIndex = 1;
  for (const m of messages) {
-    const toolData = getAnnotationData<ToolData>(m, "tools");
+    const toolData = m.annotations
+      ? (getChatUIAnnotation(m.annotations, "tools") as unknown as ToolData[])
+      : null;

    if (toolData?.some((t) => t.toolCall.name === "artifact")) {
      if ("id" in m && m.id === messageId) {
@@ -91,13 +91,6 @@
  ::file-selector-button {
    border-color: var(--color-gray-200, currentColor);
  }
-
-  /* Tailwind v4 removed cursor pointer of button and use default cursor */
-  /* https://github.com/shadcn-ui/ui/issues/6843#issuecomment-2696947980 */
-  button:not([disabled]),
-  [role="button"]:not([disabled]) {
-    cursor: pointer;
-  }
 }

@layer base {
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/server",
  "description": "LlamaIndex Server",
-  "version": "0.2.6",
+  "version": "0.2.4",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -65,7 +65,7 @@
    "@babel/traverse": "^7.27.0",
    "@babel/types": "^7.27.0",
    "@hookform/resolvers": "^5.0.1",
-    "@llamaindex/chat-ui": "0.4.9",
+    "@llamaindex/chat-ui": "0.4.6",
    "@radix-ui/react-accordion": "^1.2.3",
    "@radix-ui/react-alert-dialog": "^1.1.7",
    "@radix-ui/react-aspect-ratio": "^1.1.3",
@@ -12,7 +12,7 @@
    "format": "prettier --ignore-unknown --cache --check .",
    "format:write": "prettier --ignore-unknown --write .",
    "typecheck": "tsc --noEmit",
-    "generate": "tsx app/api/chat/generate.ts"
+    "generate": "tsx app\\api\\chat\\generate.ts"
  },
  "devDependencies": {
    "@eslint/eslintrc": "^3",
@@ -41,7 +41,7 @@
    "@babel/traverse": "^7.27.0",
    "@babel/types": "^7.27.0",
    "@hookform/resolvers": "^5.0.1",
-    "@llamaindex/chat-ui": "0.4.9",
+    "@llamaindex/chat-ui": "0.4.5",
    "@llamaindex/env": "~0.1.30",
    "@llamaindex/openai": "~0.4.0",
    "@llamaindex/readers": "~3.1.4",
@@ -181,8 +181,8 @@ importers:
        specifier: ^5.0.1
        version: 5.0.1(react-hook-form@7.56.1(react@19.1.0))
      '@llamaindex/chat-ui':
-        specifier: 0.4.9
-        version: 0.4.9(@babel/runtime@7.27.0)(@codemirror/autocomplete@6.18.6)(@codemirror/language@6.11.0)(@codemirror/lint@6.8.5)(@codemirror/search@6.5.10)(@codemirror/state@6.5.2)(@codemirror/theme-one-dark@6.1.2)(@codemirror/view@6.36.7)(@types/react-dom@19.1.2(@types/react@19.1.2))(@types/react@19.1.2)(codemirror@6.0.1)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
+        specifier: 0.4.6
+        version: 0.4.6(@babel/runtime@7.27.0)(@codemirror/autocomplete@6.18.6)(@codemirror/language@6.11.0)(@codemirror/lint@6.8.5)(@codemirror/search@6.5.10)(@codemirror/state@6.5.2)(@codemirror/theme-one-dark@6.1.2)(@codemirror/view@6.36.7)(@types/react-dom@19.1.2(@types/react@19.1.2))(@types/react@19.1.2)(codemirror@6.0.1)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
      '@llamaindex/env':
        specifier: ~0.1.30
        version: 0.1.30
@@ -1189,8 +1189,8 @@ packages:
      zod:
        optional: true

-  '@llamaindex/chat-ui@0.4.9':
-    resolution: {integrity: sha512-KEdydC+aJ22VK/TltxIHlMWbWLfh6I0YkyVd1D/CS3FRfLt8l9jfQ/YjY10MiEd8oc1fFfk6ek/FhVWe9Szstg==}
+  '@llamaindex/chat-ui@0.4.6':
+    resolution: {integrity: sha512-XvJEv/rv//8vY9Z4RosbmTyPDQFyVaWlQFe0zrJ4inz+aYqHhYtEiSCmQGgPQG+NqWStlTwpOpCye1jy4mWciQ==}
    peerDependencies:
      react: ^18.2.0 || ^19.0.0 || ^19.0.0-rc

@@ -7219,7 +7219,7 @@ snapshots:
      p-retry: 6.2.1
      zod: 3.24.3

-  '@llamaindex/chat-ui@0.4.9(@babel/runtime@7.27.0)(@codemirror/autocomplete@6.18.6)(@codemirror/language@6.11.0)(@codemirror/lint@6.8.5)(@codemirror/search@6.5.10)(@codemirror/state@6.5.2)(@codemirror/theme-one-dark@6.1.2)(@codemirror/view@6.36.7)(@types/react-dom@19.1.2(@types/react@19.1.2))(@types/react@19.1.2)(codemirror@6.0.1)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
+  '@llamaindex/chat-ui@0.4.6(@babel/runtime@7.27.0)(@codemirror/autocomplete@6.18.6)(@codemirror/language@6.11.0)(@codemirror/lint@6.8.5)(@codemirror/search@6.5.10)(@codemirror/state@6.5.2)(@codemirror/theme-one-dark@6.1.2)(@codemirror/view@6.36.7)(@types/react-dom@19.1.2(@types/react@19.1.2))(@types/react@19.1.2)(codemirror@6.0.1)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
    dependencies:
      '@codemirror/lang-css': 6.3.1
      '@codemirror/lang-html': 6.4.9
@@ -1,23 +1,5 @@
 # @create-llama/llama-index-server

-## 0.1.20
-
-### Patch Changes
-
- 087c961: Add support for human-in-the-loop
- 087c961: Refactor models.py into a separate module
- Updated dependencies [3ff0a18]
- Updated dependencies [df10474]
- Updated dependencies [087c961]
-  - @llamaindex/server@0.2.6
-
-## 0.1.19
-
-### Patch Changes
-
- Updated dependencies [058b376]
-  - @llamaindex/server@0.2.5
-
 ## 0.1.18

 ### Patch Changes
@@ -8,7 +8,6 @@ LlamaIndexServer is a FastAPI-based application that allows you to quickly launc
 - Built on FastAPI for high performance and easy API development
 - Optional built-in chat UI with extendable UI components
 - Prebuilt development code
- Human-in-the-loop (HITL) support, check out the [Human-in-the-loop](https://github.com/run-llama/create-llama/blob/main/python/llama-index-server/examples/hitl/README.md) documentation for more details.

 ## Installation

@@ -16,8 +16,7 @@ from llama_index.core.workflow import (
    Workflow,
    step,
 )
-from llama_index.server.api.utils import get_last_artifact
-from llama_index.server.models import (
+from llama_index.server.api.models import (
    Artifact,
    ArtifactEvent,
    ArtifactType,
@@ -25,6 +24,7 @@ from llama_index.server.models import (
    CodeArtifactData,
    UIEvent,
 )
+from llama_index.server.api.utils import get_last_artifact


 class Requirement(BaseModel):
@@ -16,8 +16,7 @@ from llama_index.core.workflow import (
    Workflow,
    step,
 )
-from llama_index.server.api.utils import get_last_artifact
-from llama_index.server.models import (
+from llama_index.server.api.models import (
    Artifact,
    ArtifactEvent,
    ArtifactType,
@@ -25,6 +24,7 @@ from llama_index.server.models import (
    DocumentArtifactData,
    UIEvent,
 )
+from llama_index.server.api.utils import get_last_artifact


 class DocumentRequirement(BaseModel):
@@ -4,7 +4,7 @@ import { Sparkles, Star } from "lucide-react";

 export default function Header() {
  return (
-    <div className="flex items-center justify-between p-2 px-4">
+    <div className="flex items-center justify-between px-4 pt-2">
      <div className="flex items-center gap-2">
        <Sparkles className="size-4" />
        <h1 className="font-semibold">Artifact Workflow</h1>
@@ -7,7 +7,7 @@ from examples.artifact.code_workflow import ArtifactWorkflow
 from llama_index.core.workflow import Workflow
 from llama_index.llms.openai import OpenAI
 from llama_index.server import LlamaIndexServer, UIConfig
-from llama_index.server.models import ChatRequest
+from llama_index.server.api.models import ChatRequest


 def create_workflow(chat_request: ChatRequest) -> Workflow:
@@ -1,74 +0,0 @@
-# Human in the Loop
-
-This example shows how to use the LlamaIndexServer with a human in the loop.
-
-## AgentWorkflow
-
-```bash
-uv run -- agent_workflow.py
-```
-
-## Custom Workflow
-
-```bash
-uv run -- custom_workflow.py
-```
-
-## How does it work?
-The human-in-the-loop approach used here is based on a simple idea: the workflow pauses and waits for a human response before proceeding to the next step.
-
-To do this, you will need to implement two custom events: 
-+ [HumanInputEvent](../../llama_index/server/models/hitl.py#L10): This event is used to request input from the user.
-+ [HumanResponseEvent](../../llama_index/server/models/hitl.py#L43): This event is sent to the workflow to resume execution with input from the user.
-
-In this example, we have implemented these two custom events:  
-
- [CLIHumanInputEvent](events.py#L20) – to request input from the user for CLI command execution.
- [CLIHumanResponseEvent](events.py#L8) – to resume the workflow with the response from the user.
-
-We also have a custom component, [cli_human_input.tsx](./components/cli_human_input.tsx), which displays a card that the user can update the command and choose to execute or cancel the command execution.
-
-To make the [AgentWorkflow](agent_workflow.py) work, we use the `wait_for_event()` method to wait for the human response when a tool is called.
-
-Example:
-```python
-async def cli_executor(ctx: Context, command: str) -> str:
-    """
-    This tool carefully waits for user confirmation before executing a command.
-    """
-    confirmation = await ctx.wait_for_event(
-        CLIHumanResponseEvent,
-        waiter_event=CLIHumanInputEvent(
-            data=CLICommand(command=command),
-        ),
-    )
-    if confirmation.execute:
-        # Execute the command
-        ...
-    else:
-        # Cancel the command
-        ...
-
-```
-
-And for [Custom Workflow](custom_workflow.py), we can define a step that send the `CLIHumanInputEvent` and another step that wait for the `CLIHumanResponseEvent`.
-
-Example:
-```python
-@step
-async def request_input(self, ctx: Context, ev: StartEvent) -> CLIHumanInputEvent:
-    ...
-    return CLIHumanInputEvent(
-        data=CLICommand(command=command),
-        response_event_type=CLIHumanResponseEvent,
-    )
-
-@step
-async def handle_human_response(self, ctx: Context, ev: CLIHumanResponseEvent) -> StopEvent:
-    if ev.execute:
-        # Execute the command
-        ...
-    else:
-        # Cancel the command
-        ...
-```
@@ -1,60 +0,0 @@
-import subprocess
-
-from events import CLICommand, CLIHumanInputEvent, CLIHumanResponseEvent
-from fastapi import FastAPI
-
-from llama_index.core.agent.workflow import AgentWorkflow
-from llama_index.core.workflow import Context
-from llama_index.llms.openai import OpenAI
-from llama_index.server import LlamaIndexServer, UIConfig
-
-
-async def cli_executor(ctx: Context, command: str) -> str:
-    """
-    This tool carefully waits for user confirmation before executing a command.
-    """
-    confirmation = await ctx.wait_for_event(
-        CLIHumanResponseEvent,
-        waiter_event=CLIHumanInputEvent(
-            data=CLICommand(command=command),
-        ),
-    )
-    if confirmation.execute:
-        return subprocess.check_output(confirmation.command, shell=True).decode("utf-8")
-    else:
-        return "Command execution cancelled."
-
-
-def create_workflow() -> AgentWorkflow:
-    return AgentWorkflow.from_tools_or_functions(
-        tools_or_functions=[cli_executor],
-        llm=OpenAI(model="gpt-4.1-mini"),
-        system_prompt="""
-        You are a helpful assistant that help the user execute commands.
-        You can execute commands using the cli_executor tool, don't need to ask for confirmation for triggering the tool.
-        """,
-    )
-
-
-def create_app() -> FastAPI:
-    app = LlamaIndexServer(
-        workflow_factory=create_workflow,
-        suggest_next_questions=False,
-        ui_config=UIConfig(
-            starter_questions=[
-                "List all files in the current directory",
-                "Fetch changes from the remote repository",
-            ],
-            component_dir="components",
-        ),
-    )
-    return app
-
-
-app = create_app()
-
-
-if __name__ == "__main__":
-    import uvicorn
-
-    uvicorn.run("agent_workflow:app", port=8000, reload=True)
@@ -1,96 +0,0 @@
-import { JSONValue, useChatUI } from "@llamaindex/chat-ui";
-import React, { FC, useState } from "react";
-import { Button } from "@/components/ui/button";
-import { Card, CardContent, CardFooter } from "@/components/ui/card";
-import { z } from "zod";
-
-// This schema is equivalent to the CLICommand model defined in events.py
-const CLIInputEventSchema = z.object({
-  command: z.string(),
-});
-type CLIInputEvent = z.infer<typeof CLIInputEventSchema>;
-
-
-const CLIHumanInput: FC<{
-  events: JSONValue[];
-}> = ({ events }) => {
-  const inputEvent = (events || [])
-    .map((ev) => {
-      const parseResult = CLIInputEventSchema.safeParse(ev);
-      return parseResult.success ? parseResult.data : null;
-    })
-    .filter((ev): ev is CLIInputEvent => ev !== null)
-    .at(-1);
-
-  const { append } = useChatUI();
-  const [confirmedValue, setConfirmedValue] = useState<boolean | null>(null);
-  const [editableCommand, setEditableCommand] = useState<string | undefined>(
-    inputEvent?.command,
-  );
-
-  // Update editableCommand if inputEvent changes (e.g. new event comes in)
-  React.useEffect(() => {
-    setEditableCommand(inputEvent?.command);
-  }, [inputEvent?.command]);
-
-  const handleConfirm = () => {
-    append({
-      content: "Yes",
-      role: "user",
-      annotations: [
-        {
-          type: "human_response",
-          data: {
-            execute: true,
-            command: editableCommand, // Use editable command
-          },
-        },
-      ],
-    });
-    setConfirmedValue(true);
-  };
-
-  const handleCancel = () => {
-    append({
-      content: "No",
-      role: "user",
-      annotations: [
-        {
-          type: "human_response",
-          data: {
-            execute: false,
-            command: inputEvent?.command,
-          },
-        },
-      ],
-    });
-    setConfirmedValue(false);
-  };
-
-  return (
-    <Card className="my-4">
-      <CardContent className="pt-6">
-        <p className="text-sm text-gray-700">
-          Do you want to execute the following command?
-        </p>
-        <input
-          disabled
-          type="text"
-          value={editableCommand || ""}
-          onChange={(e) => setEditableCommand(e.target.value)}
-          className="bg-gray-100 rounded p-3 my-2 text-xs font-mono text-gray-800 overflow-x-auto w-full border border-gray-300"
-        />
-      </CardContent>
-      {confirmedValue === null ? (
-        <CardFooter className="flex justify-end gap-2">
-          <>
-            <Button onClick={handleConfirm}>Yes</Button>
-            <Button onClick={handleCancel}>No</Button>
-          </>
-        </CardFooter>
-      ) : null}
-    </Card>
-  );
-};
-
-export default CLIHumanInput;
@@ -1,109 +0,0 @@
-import platform
-import subprocess
-from typing import Any
-
-from events import CLICommand, CLIHumanInputEvent, CLIHumanResponseEvent
-from fastapi import FastAPI
-
-from llama_index.core.prompts import PromptTemplate
-from llama_index.core.settings import Settings
-from llama_index.core.workflow import (
-    Context,
-    StartEvent,
-    StopEvent,
-    Workflow,
-    step,
-)
-from llama_index.server import LlamaIndexServer, UIConfig
-
-
-class CLIWorkflow(Workflow):
-    """
-    A workflow has ability to execute command line tool with human in the loop for confirmation.
-    """
-
-    default_prompt = PromptTemplate(
-        template="""
-        You are a helpful assistant who can write CLI commands to execute using {cli_language}.
-        Your task is to analyze the user's request and write a CLI command to execute.
-
-        ## User Request
-        {user_request}
-
-        Don't be verbose, only respond with the CLI command without any other text.
-        """
-    )
-
-    def __init__(self, **kwargs: Any) -> None:
-        # HITL Workflow should disable timeout otherwise, we will get a timeout error from callback
-        kwargs["timeout"] = None
-        super().__init__(**kwargs)
-
-    @step
-    async def start(self, ctx: Context, ev: StartEvent) -> CLIHumanInputEvent:
-        user_msg = ev.user_msg
-        if user_msg is None:
-            raise ValueError("Missing user_msg in StartEvent")
-        await ctx.set("user_msg", user_msg)
-        # Request LLM to generate a CLI command
-        os_name = platform.system()
-        if os_name == "Linux" or os_name == "Darwin":
-            cli_language = "bash"
-        else:
-            cli_language = "cmd"
-        prompt = self.default_prompt.format(
-            user_request=user_msg, cli_language=cli_language
-        )
-        llm = Settings.llm
-        if llm is None:
-            raise ValueError("Missing LLM in Settings")
-        response = await llm.acomplete(prompt, formatted=True)
-        command = response.text.strip()
-        if command == "":
-            raise ValueError("Couldn't generate a command")
-        # Send the command to the user for confirmation
-        await ctx.set("command", command)
-        return CLIHumanInputEvent(  # type: ignore
-            data=CLICommand(command=command),
-            response_event_type=CLIHumanResponseEvent,
-        )
-
-    @step
-    async def handle_human_response(
-        self,
-        ctx: Context,
-        ev: CLIHumanResponseEvent,  # This event is sent by LlamaIndexServer when user response
-    ) -> StopEvent:
-        # If we have human response, check the confirmation and execute the command
-        if ev.execute:
-            command = ev.command or ""
-            if command == "":
-                raise ValueError("Missing command in CLIExecutionEvent")
-            res = subprocess.run(command, shell=True, capture_output=True, text=True)
-            return StopEvent(result=res.stdout or res.stderr)
-        else:
-            return StopEvent(result=None)
-
-
-def create_app() -> FastAPI:
-    app = LlamaIndexServer(
-        workflow_factory=lambda: CLIWorkflow(),
-        suggest_next_questions=False,
-        ui_config=UIConfig(
-            starter_questions=[
-                "List all files in the current directory",
-                "Fetch changes from the remote repository",
-            ],
-            component_dir="components",
-        ),
-    )
-    return app
-
-
-app = create_app()
-
-
-if __name__ == "__main__":
-    import uvicorn
-
-    uvicorn.run("custom_workflow:app", port=8000, reload=True)
@@ -1,34 +0,0 @@
-from typing import Type
-
-from pydantic import BaseModel, Field
-
-from llama_index.server.models import HumanInputEvent, HumanResponseEvent
-
-
-class CLIHumanResponseEvent(HumanResponseEvent):
-    execute: bool = Field(
-        description="True if the human wants to execute the command, False otherwise."
-    )
-    command: str = Field(description="The command to execute.")
-
-
-class CLICommand(BaseModel):
-    command: str = Field(description="The command to execute.")
-
-
-# We need an event that extends from HumanInputEvent for HITL feature
-class CLIHumanInputEvent(HumanInputEvent):
-    """
-    CLIInputRequiredEvent is sent when the agent needs permission from the user to execute the CLI command or not.
-    Render this event by showing the command and a boolean button to execute the command or not.
-    """
-
-    event_type: str = (
-        "cli_human_input"  # used by UI to render with appropriate component
-    )
-    response_event_type: Type = (
-        CLIHumanResponseEvent  # used by workflow to resume with the correct event
-    )
-    data: CLICommand = Field(  # the data that sent to the UI for rendering
-        description="The command to execute.",
-    )
@@ -2,14 +2,13 @@ import os
 from typing import List, Optional

 from fastapi import FastAPI
-
 from llama_index.core.agent.workflow import AgentWorkflow
 from llama_index.core.query_engine.retriever_query_engine import RetrieverQueryEngine
 from llama_index.core.settings import Settings
 from llama_index.core.tools import QueryEngineTool, ToolMetadata
 from llama_index.llms.openai import OpenAI
 from llama_index.server import LlamaIndexServer, UIConfig
-from llama_index.server.models import ChatRequest
+from llama_index.server.api.models import ChatRequest
 from llama_index.server.services.llamacloud import LlamaCloudIndex, get_index
 from llama_index.server.tools.index.citation import (
    CITATION_SYSTEM_PROMPT,
@@ -3,7 +3,7 @@ from typing import Optional
 from llama_index.core.agent.workflow import AgentWorkflow
 from llama_index.core.settings import Settings
 from llama_index.llms.openai import OpenAI
-from llama_index.server.models import ChatRequest
+from llama_index.server.api.models import ChatRequest


 def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow:
@@ -1,4 +1,4 @@
-from .models.ui import UIEvent
+from .api.models import UIEvent
 from .server import LlamaIndexServer, UIConfig

 __all__ = ["LlamaIndexServer", "UIConfig", "UIEvent"]
@@ -3,7 +3,7 @@ from typing import Any

 from llama_index.core.agent.workflow.workflow_events import ToolCall, ToolCallResult
 from llama_index.server.api.callbacks.base import EventCallback
-from llama_index.server.models.ui import AgentRunEvent
+from llama_index.server.api.models import AgentRunEvent

 logger = logging.getLogger("uvicorn")

@@ -4,7 +4,7 @@ from typing import Any, List, Optional
 from llama_index.core.agent.workflow.workflow_events import ToolCallResult
 from llama_index.core.schema import NodeWithScore
 from llama_index.server.api.callbacks.base import EventCallback
-from llama_index.server.models.source_nodes import SourceNodesEvent
+from llama_index.server.api.models import SourceNodesEvent

 logger = logging.getLogger(__name__)

@@ -2,7 +2,7 @@ import logging
 from typing import Any, Optional

 from llama_index.server.api.callbacks.base import EventCallback
-from llama_index.server.models.chat import ChatRequest
+from llama_index.server.api.models import ChatRequest
 from llama_index.server.services.suggest_next_question import (
    SuggestNextQuestionsService,
 )
@@ -1,2 +1,196 @@
-# TODO: For backward compatibility, remove this in a minor release
-from llama_index.server.models import *  # noqa
+import logging
+import os
+from enum import Enum
+from typing import Any, Dict, List, Literal, Optional, Union
+
+from pydantic import BaseModel, field_validator
+
+from llama_index.core.schema import NodeWithScore
+from llama_index.core.types import ChatMessage, MessageRole
+from llama_index.core.workflow import Event
+from llama_index.server.settings import server_settings
+from llama_index.server.utils import llamacloud
+
+logger = logging.getLogger("uvicorn")
+
+
+class ChatAPIMessage(BaseModel):
+    role: MessageRole
+    content: str
+    annotations: Optional[List[Any]] = None
+
+    def to_llamaindex_message(self) -> ChatMessage:
+        return ChatMessage(role=self.role, content=self.content)
+
+
+class ChatRequest(BaseModel):
+    messages: List[ChatAPIMessage]
+    data: Optional[Any] = None
+
+    @field_validator("messages")
+    def validate_messages(cls, v: List[ChatAPIMessage]) -> List[ChatAPIMessage]:
+        if v[-1].role != MessageRole.USER:
+            raise ValueError("Last message must be from user")
+        return v
+
+
+class AgentRunEventType(Enum):
+    TEXT = "text"
+    PROGRESS = "progress"
+
+
+class AgentRunEvent(Event):
+    name: str
+    msg: str
+    event_type: AgentRunEventType = AgentRunEventType.TEXT
+    data: Optional[dict] = None
+
+    def to_response(self) -> dict:
+        return {
+            "type": "agent",
+            "data": {
+                "agent": self.name,
+                "type": self.event_type.value,
+                "text": self.msg,
+                "data": self.data,
+            },
+        }
+
+
+class SourceNodesEvent(Event):
+    nodes: List[NodeWithScore]
+
+    def to_response(self) -> dict:
+        return {
+            "type": "sources",
+            "data": {
+                "nodes": [
+                    SourceNodes.from_source_node(node).model_dump()
+                    for node in self.nodes
+                ]
+            },
+        }
+
+
+class SourceNodes(BaseModel):
+    id: str
+    metadata: Dict[str, Any]
+    score: Optional[float]
+    text: str
+    url: Optional[str]
+
+    @classmethod
+    def from_source_node(cls, source_node: NodeWithScore) -> "SourceNodes":
+        metadata = source_node.node.metadata
+        url = cls.get_url_from_metadata(metadata)
+
+        return cls(
+            id=source_node.node.node_id,
+            metadata=metadata,
+            score=source_node.score,
+            text=source_node.node.text,  # type: ignore
+            url=url,
+        )
+
+    @classmethod
+    def get_url_from_metadata(
+        cls,
+        metadata: Dict[str, Any],
+        data_dir: Optional[str] = None,
+    ) -> Optional[str]:
+        url_prefix = server_settings.file_server_url_prefix
+        if data_dir is None:
+            data_dir = "data"
+        file_name = metadata.get("file_name")
+
+        if file_name and url_prefix:
+            if llamacloud.is_llamacloud_file(metadata):
+                file_name = llamacloud.get_local_file_name(metadata)
+                return f"{url_prefix}/output/llamacloud/{file_name}"
+            is_private = metadata.get("private", "false") == "true"
+            if is_private:
+                # file is a private upload
+                return f"{url_prefix}/output/uploaded/{file_name}"
+            # file is from calling the 'generate' script
+            # Get the relative path of file_path to data_dir
+            file_path = metadata.get("file_path")
+            data_dir = os.path.abspath(data_dir)
+            if file_path and data_dir:
+                relative_path = os.path.relpath(file_path, data_dir)
+                return f"{url_prefix}/data/{relative_path}"
+        # fallback to URL in metadata (e.g. for websites)
+        return metadata.get("URL")
+
+    @classmethod
+    def from_source_nodes(
+        cls, source_nodes: List[NodeWithScore]
+    ) -> List["SourceNodes"]:
+        return [cls.from_source_node(node) for node in source_nodes]
+
+
+class ComponentDefinition(BaseModel):
+    type: str
+    code: str
+    filename: str
+
+
+class UIEvent(Event):
+    type: str
+    data: BaseModel
+
+    def to_response(self) -> dict:
+        return {
+            "type": self.type,
+            "data": self.data.model_dump(),
+        }
+
+
+class ArtifactType(str, Enum):
+    CODE = "code"
+    DOCUMENT = "document"
+
+
+class CodeArtifactData(BaseModel):
+    file_name: str
+    code: str
+    language: str
+
+
+class DocumentArtifactData(BaseModel):
+    title: str
+    content: str
+    type: Literal["markdown", "html"]
+
+
+class Artifact(BaseModel):
+    created_at: Optional[int] = None
+    type: ArtifactType
+    data: Union[CodeArtifactData, DocumentArtifactData]
+
+    @classmethod
+    def from_message(cls, message: ChatAPIMessage) -> Optional["Artifact"]:
+        if not message.annotations or not isinstance(message.annotations, list):
+            return None
+
+        for annotation in message.annotations:
+            if isinstance(annotation, dict) and annotation.get("type") == "artifact":
+                try:
+                    artifact = cls.model_validate(annotation.get("data"))
+                    return artifact
+                except Exception as e:
+                    logger.warning(
+                        f"Failed to parse artifact from annotation: {annotation}. Error: {e}"
+                    )
+
+        return None
+
+
+class ArtifactEvent(Event):
+    type: str = "artifact"
+    data: Artifact
+
+    def to_response(self) -> dict:
+        return {
+            "type": self.type,
+            "data": self.data.model_dump(),
+        }
@@ -11,10 +11,7 @@ from llama_index.core.agent.workflow.workflow_events import (
    AgentSetup,
    AgentStream,
 )
-from llama_index.core.workflow import (
-    StopEvent,
-    Workflow,
-)
+from llama_index.core.workflow import StopEvent, Workflow
 from llama_index.server.api.callbacks import (
    AgentCallTool,
    EventCallback,
@@ -23,11 +20,9 @@ from llama_index.server.api.callbacks import (
    SuggestNextQuestions,
 )
 from llama_index.server.api.callbacks.stream_handler import StreamHandler
+from llama_index.server.api.models import ChatRequest
 from llama_index.server.api.utils.vercel_stream import VercelStreamResponse
-from llama_index.server.models.chat import ChatRequest
-from llama_index.server.models.hitl import HumanInputEvent
 from llama_index.server.services.llamacloud import LlamaCloudFileService
-from llama_index.server.services.workflow import HITLWorkflowService


 def chat_router(
@@ -43,8 +38,7 @@ def chat_router(
        background_tasks: BackgroundTasks,
    ) -> StreamingResponse:
        try:
-            last_message = request.messages[-1]
-            user_message = last_message.to_llamaindex_message()
+            user_message = request.messages[-1].to_llamaindex_message()
            chat_history = [
                message.to_llamaindex_message() for message in request.messages[:-1]
            ]
@@ -54,21 +48,10 @@ def chat_router(
                workflow = workflow_factory(chat_request=request)
            else:
                workflow = workflow_factory()
-
-            # Check if we should resume a chat with a human response
-            human_response = last_message.human_response
-            if human_response:
-                ctx = await HITLWorkflowService.load_context(
-                    id=request.id,
-                    workflow=workflow,
-                    data=human_response,
-                )
-                workflow_handler = workflow.run(ctx=ctx)
-            else:
-                workflow_handler = workflow.run(
-                    user_msg=user_message.content,
-                    chat_history=chat_history,
-                )
+            workflow_handler = workflow.run(
+                user_msg=user_message.content,
+                chat_history=chat_history,
+            )

            callbacks: list[EventCallback] = [
                AgentCallTool(),
@@ -83,11 +66,7 @@ def chat_router(
            )

            return VercelStreamResponse(
-                content_generator=_stream_content(
-                    stream_handler,
-                    logger,
-                    request.id,
-                ),
+                content_generator=_stream_content(stream_handler, request, logger),
            )
        except Exception as e:
            logger.error(e)
@@ -120,8 +99,8 @@ def chat_router(

 async def _stream_content(
    handler: StreamHandler,
+    request: ChatRequest,
    logger: logging.Logger,
-    chat_id: str,
 ) -> AsyncGenerator[str, None]:
    async def _text_stream(
        event: Union[AgentStream, StopEvent],
@@ -147,19 +126,6 @@ async def _stream_content(
                async for chunk in _text_stream(event):
                    handler.accumulate_text(chunk)
                    yield VercelStreamResponse.convert_text(chunk)
-            elif isinstance(event, HumanInputEvent):
-                ctx = handler.workflow_handler.ctx
-                if ctx is None:
-                    raise RuntimeError("Context is None")
-                # Save the context with the HITL event
-                await HITLWorkflowService.save_context(
-                    id=chat_id,
-                    ctx=ctx,
-                    resume_event_type=event.response_event_type,
-                )
-                yield VercelStreamResponse.convert_data(event.to_response())
-                # Break to stop the stream
-                break
            elif isinstance(event, dict):
                yield VercelStreamResponse.convert_data(event)
            elif hasattr(event, "to_response"):
@@ -2,8 +2,7 @@ import logging
 from typing import List

 from fastapi import APIRouter
-
-from llama_index.server.models.ui import ComponentDefinition
+from llama_index.server.api.models import ComponentDefinition
 from llama_index.server.services.custom_ui import CustomUI


@@ -1,7 +1,6 @@
 from typing import List, Optional

-from llama_index.server.models.artifacts import Artifact
-from llama_index.server.models.chat import ChatRequest
+from llama_index.server.api.models import Artifact, ChatRequest


 def get_artifacts(chat_request: ChatRequest) -> List[Artifact]:
@@ -1,34 +0,0 @@
-from llama_index.server.models.artifacts import (
-    Artifact,
-    ArtifactEvent,
-    ArtifactType,
-    CodeArtifactData,
-    DocumentArtifactData,
-)
-from llama_index.server.models.chat import ChatAPIMessage, ChatRequest
-from llama_index.server.models.hitl import HumanInputEvent, HumanResponseEvent
-from llama_index.server.models.source_nodes import SourceNodes, SourceNodesEvent
-from llama_index.server.models.ui import (
-    AgentRunEvent,
-    AgentRunEventType,
-    ComponentDefinition,
-    UIEvent,
-)
-
-__all__ = [
-    "Artifact",
-    "ArtifactEvent",
-    "ArtifactType",
-    "DocumentArtifactData",
-    "CodeArtifactData",
-    "ChatAPIMessage",
-    "ChatRequest",
-    "UIEvent",
-    "ComponentDefinition",
-    "AgentRunEvent",
-    "AgentRunEventType",
-    "SourceNodes",
-    "SourceNodesEvent",
-    "HumanInputEvent",
-    "HumanResponseEvent",
-]
@@ -1,60 +0,0 @@
-import logging
-from enum import Enum
-from typing import Literal, Optional, Union
-
-from llama_index.core.workflow.events import Event
-from llama_index.server.models.chat import ChatAPIMessage
-from pydantic import BaseModel
-
-logger = logging.getLogger(__name__)
-
-
-class ArtifactType(str, Enum):
-    CODE = "code"
-    DOCUMENT = "document"
-
-
-class CodeArtifactData(BaseModel):
-    file_name: str
-    code: str
-    language: str
-
-
-class DocumentArtifactData(BaseModel):
-    title: str
-    content: str
-    type: Literal["markdown", "html"]
-
-
-class Artifact(BaseModel):
-    created_at: Optional[int] = None
-    type: ArtifactType
-    data: Union[CodeArtifactData, DocumentArtifactData]
-
-    @classmethod
-    def from_message(cls, message: ChatAPIMessage) -> Optional["Artifact"]:
-        if not message.annotations or not isinstance(message.annotations, list):
-            return None
-
-        for annotation in message.annotations:
-            if isinstance(annotation, dict) and annotation.get("type") == "artifact":
-                try:
-                    artifact = cls.model_validate(annotation.get("data"))
-                    return artifact
-                except Exception as e:
-                    logger.warning(
-                        f"Failed to parse artifact from annotation: {annotation}. Error: {e}"
-                    )
-
-        return None
-
-
-class ArtifactEvent(Event):
-    type: str = "artifact"
-    data: Artifact
-
-    def to_response(self) -> dict:
-        return {
-            "type": self.type,
-            "data": self.data.model_dump(),
-        }
@@ -1,44 +0,0 @@
-import re
-from typing import Any, List, Optional
-
-from pydantic import BaseModel, field_validator
-
-from llama_index.core.types import ChatMessage, MessageRole
-
-
-class ChatAPIMessage(BaseModel):
-    role: MessageRole
-    content: str
-    annotations: Optional[List[Any]] = None
-
-    def to_llamaindex_message(self) -> ChatMessage:
-        return ChatMessage(role=self.role, content=self.content)
-
-    @property
-    def human_response(self) -> Optional[Any]:
-        if self.annotations:
-            for annotation in self.annotations:
-                if (
-                    isinstance(annotation, dict)
-                    and annotation.get("type") == "human_response"
-                ):
-                    return annotation.get("data", {})
-        return None
-
-
-class ChatRequest(BaseModel):
-    id: str  # see https://ai-sdk.dev/docs/reference/ai-sdk-ui/use-chat#id - constant for the same chat session
-    messages: List[ChatAPIMessage]
-    data: Optional[Any] = None
-
-    @field_validator("messages")
-    def validate_messages(cls, v: List[ChatAPIMessage]) -> List[ChatAPIMessage]:
-        if v[-1].role != MessageRole.USER:
-            raise ValueError("Last message must be from user")
-        return v
-
-    @field_validator("id")
-    def validate_id(cls, v: str) -> str:
-        if re.search(r"[^a-zA-Z0-9_-]", v):
-            raise ValueError("ID contains special characters")
-        return v
@@ -1,51 +0,0 @@
-from typing import Any, Dict, Type, Union
-
-from llama_index.core.workflow.events import (
-    HumanResponseEvent as FrameworkHumanResponseEvent,
-)
-from llama_index.core.workflow.events import InputRequiredEvent
-from pydantic import BaseModel, Field
-
-
-class HumanResponseEvent(FrameworkHumanResponseEvent):
-    """
-    Use this event to send a response from a human.
-    """
-
-    def __init__(self, **kwargs: Any) -> None:
-        if "response" not in kwargs:
-            kwargs["response"] = f"Human response with data: {kwargs.get('data', {})}"
-        super().__init__(**kwargs)
-
-
-class HumanInputEvent(InputRequiredEvent):
-    """
-    Use this event to request input from a human.
-    It will block the workflow execution until the human responds.
-    """
-
-    response_event_type: Type[HumanResponseEvent] = Field(
-        description="The type of event that the workflow is waiting for.",
-    )
-    event_type: str = Field(
-        description="An identifier for the UI component that will be used to render the input.",
-    )
-    data: Union[Dict[str, Any], BaseModel] = Field(
-        description="The data to be sent to the UI component that will be used to render the input.",
-    )
-
-    def __init__(self, **kwargs: Any) -> None:
-        # Construct the prefix for InputRequiredEvent
-        event_type = kwargs.get("event_type", None)
-        data = kwargs.get("data", None)
-        if "prefix" not in kwargs:
-            kwargs["prefix"] = f"Need input for {event_type} with data: {data}"
-        super().__init__(**kwargs)
-
-    def to_response(self) -> dict:
-        return {
-            "type": self.event_type,
-            "data": self.data
-            if isinstance(self.data, dict)
-            else self.data.model_dump(),
-        }
@@ -1,49 +0,0 @@
-from typing import Any, Dict, List, Optional
-
-from pydantic import BaseModel
-
-from llama_index.core.schema import NodeWithScore
-from llama_index.core.workflow.events import Event
-from llama_index.server.utils.chat_file import get_file_url_from_metadata
-
-
-class SourceNodesEvent(Event):
-    nodes: List[NodeWithScore]
-
-    def to_response(self) -> dict:
-        return {
-            "type": "sources",
-            "data": {
-                "nodes": [
-                    SourceNodes.from_source_node(node).model_dump()
-                    for node in self.nodes
-                ]
-            },
-        }
-
-
-class SourceNodes(BaseModel):
-    id: str
-    metadata: Dict[str, Any]
-    score: Optional[float]
-    text: str
-    url: Optional[str]
-
-    @classmethod
-    def from_source_node(cls, source_node: NodeWithScore) -> "SourceNodes":
-        metadata = source_node.node.metadata
-        url = get_file_url_from_metadata(metadata)
-
-        return cls(
-            id=source_node.node.node_id,
-            metadata=metadata,
-            score=source_node.score,
-            text=source_node.node.text,  # type: ignore
-            url=url,
-        )
-
-    @classmethod
-    def from_source_nodes(
-        cls, source_nodes: List[NodeWithScore]
-    ) -> List["SourceNodes"]:
-        return [cls.from_source_node(node) for node in source_nodes]
@@ -1,49 +0,0 @@
-import logging
-from enum import Enum
-from typing import Optional
-
-from pydantic import BaseModel
-
-from llama_index.core.workflow import Event
-
-logger = logging.getLogger("uvicorn")
-
-
-class AgentRunEventType(Enum):
-    TEXT = "text"
-    PROGRESS = "progress"
-
-
-class AgentRunEvent(Event):
-    name: str
-    msg: str
-    event_type: AgentRunEventType = AgentRunEventType.TEXT
-    data: Optional[dict] = None
-
-    def to_response(self) -> dict:
-        return {
-            "type": "agent",
-            "data": {
-                "agent": self.name,
-                "type": self.event_type.value,
-                "text": self.msg,
-                "data": self.data,
-            },
-        }
-
-
-class ComponentDefinition(BaseModel):
-    type: str
-    code: str
-    filename: str
-
-
-class UIEvent(Event):
-    type: str
-    data: BaseModel
-
-    def to_response(self) -> dict:
-        return {
-            "type": self.type,
-            "data": self.data.model_dump(),
-        }
@@ -2,7 +2,7 @@ import logging
 import os
 from typing import List, Optional

-from llama_index.server.models.ui import ComponentDefinition
+from llama_index.server.api.models import ComponentDefinition


 class CustomUI:
@@ -11,7 +11,7 @@ from llama_cloud import ManagedIngestionStatus, PipelineFileCreateCustomMetadata
 from pydantic import BaseModel

 from llama_index.core.schema import NodeWithScore
-from llama_index.server.models.source_nodes import SourceNodes
+from llama_index.server.api.models import SourceNodes
 from llama_index.server.services.llamacloud.index import get_client
 from llama_index.server.utils import llamacloud

@@ -3,15 +3,14 @@ import os
 from typing import TYPE_CHECKING, Any, Optional

 from llama_cloud import PipelineType
-from pydantic import BaseModel, Field, field_validator
-
 from llama_index.core.callbacks import CallbackManager
 from llama_index.core.ingestion.api_utils import (
    get_client as llama_cloud_get_client,
 )
 from llama_index.core.settings import Settings
 from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
-from llama_index.server.models.chat import ChatRequest
+from llama_index.server.api.models import ChatRequest
+from pydantic import BaseModel, Field, field_validator

 if TYPE_CHECKING:
    from llama_cloud.client import LlamaCloud
@@ -5,7 +5,7 @@ from typing import List, Optional, Union

 from llama_index.core.prompts import PromptTemplate
 from llama_index.core.settings import Settings
-from llama_index.server.models.chat import ChatAPIMessage
+from llama_index.server.api.models import ChatAPIMessage
 from llama_index.server.prompts import SUGGEST_NEXT_QUESTION_PROMPT

 logger = logging.getLogger("uvicorn")
@@ -1,106 +0,0 @@
-import json
-import logging
-from pathlib import Path
-from typing import Type
-
-from llama_index.core.workflow import (
-    Context,
-    JsonSerializer,
-    Workflow,
-)
-from llama_index.server.models.hitl import HumanResponseEvent
-from llama_index.server.utils.class_meta_serialization import (
-    type_from_identifier,
-    type_identifier,
-)
-
-logger = logging.getLogger(__name__)
-
-
-class HITLWorkflowService:
-    """
-    A service for helping pause and resume a HITL workflow.
-    """
-
-    # A key in context that stores the HITL event type
-    HITL_CONTEXT_KEY = "human_response_type"
-
-    @staticmethod
-    def get_storage_path(id: str) -> Path:
-        storage_dir = Path("output") / "checkpoints"
-        if not storage_dir.exists():
-            storage_dir.mkdir(parents=True, exist_ok=True)
-        return storage_dir / f"{id}.json"
-
-    @classmethod
-    async def save_context(
-        cls,
-        id: str,
-        ctx: Context,
-        resume_event_type: Type[HumanResponseEvent],
-    ) -> None:
-        """
-        Save the current checkpoint to a file and return the id
-
-        Args:
-            id: The id to save the context to.
-            ctx: The context to save.
-            resume_event_type [Optional]: Save workflow context with a resume event.
-        """
-        await ctx.set(
-            key=cls.HITL_CONTEXT_KEY,
-            value=type_identifier(resume_event_type),
-        )
-
-        ctx_data = ctx.to_dict(serializer=JsonSerializer())
-        with open(cls.get_storage_path(id), "w") as f:
-            json.dump(ctx_data, f)
-
-    @classmethod
-    async def load_context(
-        cls,
-        id: str,
-        workflow: Workflow,
-        data: dict,
-    ) -> Context:
-        file_path = cls.get_storage_path(id)
-        if not file_path.exists():
-            raise FileNotFoundError(f"No checkpoint found for id: {id}")
-        try:
-            with open(file_path, "r") as f:
-                ctx_data = json.load(f)
-        except json.JSONDecodeError as e:
-            raise ValueError(f"Invalid checkpoint data for id {id}: {e}")
-        ctx = Context.from_dict(
-            workflow=workflow,
-            data=ctx_data,
-            serializer=JsonSerializer(),
-        )
-        resume_event = await cls._construct_resume_event(ctx, data)
-        ctx.send_event(resume_event)
-        return ctx
-
-    @classmethod
-    async def _construct_resume_event(
-        cls, context: Context, data: dict
-    ) -> HumanResponseEvent:
-        """
-        Get the HITL event from the context.
-        """
-        event_type_str = await context.get(cls.HITL_CONTEXT_KEY)
-        if not event_type_str:
-            raise ValueError(
-                "Cannot resume the workflow because there is no resume event type in the context"
-            )
-        resume_event_type = type_from_identifier(event_type_str)
-        if not issubclass(resume_event_type, HumanResponseEvent):
-            raise ValueError(
-                f"Cannot resume the workflow because the resume event type {resume_event_type} is not a HumanResponseEvent"
-            )
-        try:
-            return resume_event_type(**data)
-        except Exception as e:
-            raise ValueError(
-                f"Error constructing resume event: {e}. "
-                f"Make sure the provided data is valid for the event type {resume_event_type}"
-            )
@@ -14,7 +14,7 @@ from llama_index.core.tools import (
    ToolSelection,
 )
 from llama_index.core.workflow import Context
-from llama_index.server.models.ui import AgentRunEvent, AgentRunEventType
+from llama_index.server.api.models import AgentRunEvent, AgentRunEventType
 from llama_index.core.agent.workflow.workflow_events import ToolCall, ToolCallResult

 logger = logging.getLogger("uvicorn")
@@ -1,36 +0,0 @@
-import os
-from typing import Any, Dict, Optional
-
-from llama_index.server.settings import server_settings
-from llama_index.server.utils import llamacloud
-
-
-def get_file_url_from_metadata(
-    metadata: Dict[str, Any],
-    data_dir: Optional[str] = None,
-) -> Optional[str]:
-    """
-    Get the URL of a file from the source node metadata.
-    """
-    url_prefix = server_settings.file_server_url_prefix
-    if data_dir is None:
-        data_dir = "data"
-    file_name = metadata.get("file_name")
-
-    if file_name and url_prefix:
-        if llamacloud.is_llamacloud_file(metadata):
-            file_name = llamacloud.get_local_file_name(metadata)
-            return f"{url_prefix}/output/llamacloud/{file_name}"
-        is_private = metadata.get("private", "false") == "true"
-        if is_private:
-            # file is a private upload
-            return f"{url_prefix}/output/uploaded/{file_name}"
-        # file is from calling the 'generate' script
-        # Get the relative path of file_path to data_dir
-        file_path = metadata.get("file_path")
-        data_dir = os.path.abspath(data_dir)
-        if file_path and data_dir:
-            relative_path = os.path.relpath(file_path, data_dir)
-            return f"{url_prefix}/data/{relative_path}"
-    # fallback to URL in metadata (e.g. for websites)
-    return metadata.get("URL")
@@ -1,30 +0,0 @@
-# Helper functions for serializing and deserializing class metadata.
-import importlib
-from typing import Type
-
-
-def type_identifier(type: Type) -> str:
-    """
-    Get the identifier of a type.
-    """
-    return f"{type.__module__}.{type.__qualname__}"
-
-
-def type_from_identifier(identifier: str) -> Type:
-    """
-    Get the type from an identifier.
-    """
-    if not identifier or "." not in identifier:
-        raise ValueError(f"Invalid type identifier format: {identifier}")
-    try:
-        module, qualname = identifier.rsplit(".", 1)
-        imported_module = importlib.import_module(module)
-        if not hasattr(imported_module, qualname):
-            raise AttributeError(f"Module '{module}' has no attribute '{qualname}'")
-        return getattr(imported_module, qualname)
-    except ImportError as e:
-        raise ImportError(f"Failed to import module '{module}': {e}")
-    except Exception as e:
-        raise RuntimeError(
-            f"Failed to resolve type from identifier '{identifier}': {e}"
-        )
@@ -1,7 +1,7 @@
 {
  "name": "@create-llama/llama-index-server",
  "private": true,
-  "version": "0.1.20",
+  "version": "0.1.18",
  "type": "module",
  "scripts": {
    "prebuild": "uv run -- scripts/frontend.py --mode copy",
@@ -1,6 +1,6 @@
 [project]
 name = "llama-index-server"
-version = "0.1.20"
+version = "0.1.18"
 description = "llama-index fastapi server"
 readme = "README.md"
 license = "MIT"
@@ -1,5 +1,4 @@
 import logging
-from typing import AsyncGenerator, Callable
 from unittest.mock import AsyncMock, MagicMock

 import pytest
@@ -8,32 +7,31 @@ from httpx import ASGITransport, AsyncClient

 from llama_index.core.workflow import StopEvent, Workflow
 from llama_index.core.workflow.handler import WorkflowHandler
+from llama_index.server.api.models import ChatAPIMessage, ChatRequest
 from llama_index.server.api.routers.chat import chat_router
-from llama_index.server.models.chat import ChatAPIMessage, ChatRequest, MessageRole


@pytest.fixture()
-def logger() -> logging.Logger:
+def logger():
    return logging.getLogger("test")


@pytest.fixture()
-def chat_request() -> ChatRequest:
+def chat_request():
    """Create a simple chat request with one user message."""
    return ChatRequest(
-        id="test",
-        messages=[ChatAPIMessage(role=MessageRole.USER, content="Hello, how are you?")],
+        messages=[ChatAPIMessage(role="user", content="Hello, how are you?")]
    )


@pytest.fixture()
-def mock_workflow() -> MagicMock:
+def mock_workflow():
    """Create a mock workflow that returns a simple response."""
    workflow = MagicMock(spec=Workflow)
    handler = AsyncMock(spec=WorkflowHandler)

    # Setup the handler to stream a simple response event
-    async def mock_stream_events() -> AsyncGenerator[StopEvent, None]:
+    async def mock_stream_events():
        yield StopEvent(result="I'm doing well, thank you for asking!")

    handler.stream_events.return_value = mock_stream_events()
@@ -43,21 +41,17 @@ def mock_workflow() -> MagicMock:


@pytest.fixture()
-def workflow_factory(mock_workflow: MagicMock) -> Callable[[], MagicMock]:
+def workflow_factory(mock_workflow):
    """Create a factory function that returns our mock workflow."""

-    def factory(verbose: bool = False) -> MagicMock:
+    def factory(verbose=False):
        return mock_workflow

    return factory


@pytest.mark.asyncio()
-async def test_chat_router(
-    chat_request: ChatRequest,
-    workflow_factory: Callable[[], MagicMock],
-    logger: logging.Logger,
-) -> None:
+async def test_chat_router(chat_request, workflow_factory, logger):
    """Test that the chat router handles a request correctly."""
    # Create a FastAPI app and mount our router
    app = FastAPI()
@@ -96,14 +90,14 @@ async def test_chat_router(


@pytest.mark.asyncio()
-async def test_chat_with_agent_workflow(logger: logging.Logger) -> None:
+async def test_chat_with_agent_workflow(logger):
    """Test that the chat router works with a workflow that mimics an agent workflow."""
    # Create a simple workflow that mimics an agent workflow
    mock_workflow = MagicMock(spec=Workflow)
    handler = AsyncMock(spec=WorkflowHandler)

    # Setup the handler to stream a simple response about weather
-    async def mock_stream_events() -> AsyncGenerator[StopEvent, None]:
+    async def mock_stream_events():
        yield StopEvent(
            result="The weather in New York is sunny. I used the weather tool to get this information."
        )
@@ -112,7 +106,7 @@ async def test_chat_with_agent_workflow(logger: logging.Logger) -> None:
    mock_workflow.run.return_value = handler

    # Create a factory function that returns our mock workflow
-    def workflow_factory(verbose: bool = False) -> MagicMock:
+    def workflow_factory(verbose=False):
        return mock_workflow

    # Create a FastAPI app and mount our router
@@ -122,12 +116,9 @@ async def test_chat_with_agent_workflow(logger: logging.Logger) -> None:

    # Create a chat request asking about weather
    chat_request = ChatRequest(
-        id="test",
        messages=[
-            ChatAPIMessage(
-                role=MessageRole.USER, content="What's the weather in New York?"
-            )
-        ],
+            ChatAPIMessage(role="user", content="What's the weather in New York?")
+        ]
    )

    # Make a request to the chat endpoint
@@ -9,9 +9,9 @@ from llama_index.core.agent.workflow.workflow_events import AgentStream
 from llama_index.core.types import MessageRole
 from llama_index.core.workflow import StopEvent
 from llama_index.core.workflow.handler import WorkflowHandler
+from llama_index.server.api.models import ChatAPIMessage, ChatRequest
 from llama_index.server.api.routers.chat import _stream_content
 from llama_index.server.api.utils.vercel_stream import VercelStreamResponse
-from llama_index.server.models.chat import ChatAPIMessage, ChatRequest


@pytest.fixture()
@@ -22,8 +22,7 @@ def logger() -> logging.Logger:
@pytest.fixture()
 def chat_request() -> ChatRequest:
    return ChatRequest(
-        id="test",
-        messages=[ChatAPIMessage(role=MessageRole.USER, content="test message")],
+        messages=[ChatAPIMessage(role=MessageRole.USER, content="test message")]
    )


@@ -51,7 +50,7 @@ class TestEventStream:
        result = [
            chunk
            async for chunk in _stream_content(
-                mock_workflow_handler, logger, chat_request.id
+                mock_workflow_handler, chat_request, logger
            )
        ]

@@ -76,7 +75,7 @@ class TestEventStream:
        result = [
            chunk
            async for chunk in _stream_content(
-                mock_workflow_handler, logger, chat_request.id
+                mock_workflow_handler, chat_request, logger
            )
        ]

@@ -100,7 +99,7 @@ class TestEventStream:
        result = [
            chunk
            async for chunk in _stream_content(
-                mock_workflow_handler, logger, chat_request.id
+                mock_workflow_handler, chat_request, logger
            )
        ]

@@ -125,7 +124,7 @@ class TestEventStream:
        result = [
            chunk
            async for chunk in _stream_content(
-                mock_workflow_handler, logger, chat_request.id
+                mock_workflow_handler, chat_request, logger
            )
        ]

@@ -149,7 +148,7 @@ class TestEventStream:
        result = [
            chunk
            async for chunk in _stream_content(
-                mock_workflow_handler, logger, chat_request.id
+                mock_workflow_handler, chat_request, logger
            )
        ]

@@ -172,7 +171,7 @@ class TestEventStream:
        result = [
            chunk
            async for chunk in _stream_content(
-                mock_workflow_handler, logger, chat_request.id
+                mock_workflow_handler, chat_request, logger
            )
        ]

@@ -197,7 +196,7 @@ class TestEventStream:
        result = [
            chunk
            async for chunk in _stream_content(
-                mock_workflow_handler, logger, chat_request.id
+                mock_workflow_handler, chat_request, logger
            )
        ]

@@ -1936,7 +1936,7 @@ wheels = [

 [[package]]
 name = "llama-index-server"
-version = "0.1.19"
+version = "0.1.17"
 source = { editable = "." }
 dependencies = [
    { name = "cachetools" },