feat: Add config for suggest next question (#640)

* Enhance LlamaIndexServer with next question suggestion feature - Added `suggest_next_questions` parameter to the LlamaIndexServer for suggesting follow-up questions after the assistant's response. - Updated README.md to document the new configuration option. - Introduced `SUGGEST_NEXT_QUESTION_PROMPT` in prompts.py for customizable question suggestions. - Bumped version to 0.1.16 in uv.lock to reflect the new feature. * Implement next question suggestion feature in LlamaIndexServer - Added `suggestNextQuestions` option to LlamaIndexServer for suggesting follow-up questions after the assistant's response. - Updated README.md to include the new configuration option. - Modified example workflow to utilize the new feature. - Enhanced chat handler to conditionally send suggested questions based on the new option. * add changeset * remove log * bundle ui instead of download * check test * check test check test check test check test check test check test check test check test check test check test * fix tests * Update artifact path in workflow and clarify README.md text - Changed the artifact path in the GitHub Actions workflow from `python/llama-index-server/dist/` to `dist/`. - Revised README.md to clarify the default prompt used for the `suggest_next_questions` configuration option. * support changeset for python * refactor: update llama-index-server structure and workflows * fix workflows * fix workflows * fix workflows * add changeset * fix cannot release python * Update packages/server/README.md Co-authored-by: Thuc Pham <51660321+thucpn@users.noreply.github.com> * Update starter questions in LlamaIndex App and add TODO for suggestion feature in chat API --------- Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de> Co-authored-by: Thuc Pham <51660321+thucpn@users.noreply.github.com>
2026-06-30 21:08:02 -04:00 · 2025-05-23 12:48:45 +07:00
parent bbae802bed
commit 0bc5a0d882
34 changed files with 527 additions and 477 deletions
@@ -0,0 +1,5 @@
+---
+"@create-llama/llama-index-server": patch
+---
+
+Add suggestNextQuestions config
@@ -0,0 +1,5 @@
+---
+"@llamaindex/server": patch
+---
+
+Add suggestNextQuestions config
@@ -16,6 +16,16 @@ jobs:

      - uses: pnpm/action-setup@v3

+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
@@ -17,6 +17,11 @@ jobs:

      - uses: pnpm/action-setup@v3

+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
      - name: Install uv
        uses: astral-sh/setup-uv@v3

@@ -56,3 +61,5 @@ jobs:
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
+          PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
+          UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
@@ -1,138 +0,0 @@
-name: Release llama-index-server
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - "python/llama-index-server/**"
-      - ".github/workflows/release_llama_index_server.yml"
-  pull_request:
-    types:
-      - closed
-
-concurrency: ${{ github.workflow }}-${{ github.ref }}
-
-jobs:
-  release:
-    name: Create Release PR
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: ./python/llama-index-server
-    if: |
-      github.event_name == 'push' && 
-      !startsWith(github.ref, 'refs/heads/release/llama-index-server-v') &&
-      !contains(github.event.head_commit.message, 'Release: llama-index-server v')
-
-    steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          enable-cache: true
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install dependencies
-        shell: bash
-        run: uv sync --all-extras --dev
-
-      - name: Setup Git
-        run: |
-          git config --global user.email "github-actions[bot]@users.noreply.github.com"
-          git config --global user.name "github-actions[bot]"
-
-      - name: Bump patch version
-        shell: bash
-        run: |
-          uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version $(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version | awk -F. '{$NF = $NF + 1;}1' OFS=.)
-          git add pyproject.toml
-          git commit -m "chore(release): bump llama-index-server version to $(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)"
-
-      - name: Get current version
-        id: get_version
-        shell: bash
-        run: |
-          version=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)
-          echo "current_version=${version}" >> "$GITHUB_OUTPUT"
-
-      - name: Create Release PR
-        uses: peter-evans/create-pull-request@v6
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          commit-message: "Release: llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          title: "Release: llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          body: |
-            This PR was automatically created to release a new version of the llama-index-server package.
-
-            Version: ${{ steps.get_version.outputs.current_version }}
-
-            Please review the changes and merge to trigger the release.
-          branch: release/llama-index-server-v${{ steps.get_version.outputs.current_version }}
-          base: main
-          labels: release, llama-index-server
-
-  publish:
-    name: Publish to PyPI
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: ./python/llama-index-server
-    if: |
-      github.event_name == 'pull_request' && 
-      github.event.pull_request.merged == true && 
-      startsWith(github.event.pull_request.title, 'Release: llama-index-server') &&
-      startsWith(github.event.pull_request.head.ref, 'release/llama-index-server-v')
-
-    steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          enable-cache: true
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install dependencies
-        shell: bash
-        run: uv sync --all-extras
-
-      - name: Get current version
-        id: get_version
-        shell: bash
-        run: |
-          version=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)
-          echo "current_version=${version}" >> "$GITHUB_OUTPUT"
-
-      - name: Build package
-        shell: bash
-        run: uv build --no-sources
-
-      - name: Publish to PyPI
-        shell: bash
-        run: uv publish --token ${{ secrets.PYPI_TOKEN }}
-
-      - name: Create GitHub Release
-        uses: softprops/action-gh-release@v2
-        with:
-          tag_name: llama-index-server-v${{ steps.get_version.outputs.current_version }}
-          name: "llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          body: |
-            Release of llama-index-server v${{ steps.get_version.outputs.current_version }}
-          draft: false
-          prerelease: false
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -5,6 +5,7 @@ on:

 env:
  PYTHON_VERSION: "3.9"
+  UI_TEST: "true"

 jobs:
  unit-test:
@@ -19,20 +20,27 @@ jobs:
        python-version: ["3.9"]
    steps:
      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v3
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}

      - name: Install uv
        uses: astral-sh/setup-uv@v5
        with:
          enable-cache: true

-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
        with:
-          python-version: ${{ matrix.python-version }}
+          node-version-file: ".nvmrc"
+          cache: "pnpm"

      - name: Install dependencies
        shell: bash
-        run: uv sync --all-extras --dev
+        run: pnpm install && pnpm build

      - name: Run unit tests
        shell: bash
@@ -46,20 +54,20 @@ jobs:
        working-directory: python/llama-index-server
    steps:
      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v3
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}

      - name: Install uv
        uses: astral-sh/setup-uv@v5
        with:
          enable-cache: true

-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-
      - name: Install dependencies
-        shell: bash
-        run: uv sync --all-extras --dev
+        run: pnpm install

      - name: Run mypy
        shell: bash
@@ -73,27 +81,56 @@ jobs:
        working-directory: python/llama-index-server
    steps:
      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          enable-cache: true
+      - uses: pnpm/action-setup@v3

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}

-      - name: Install build package
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: ".nvmrc"
+          cache: "pnpm"
+
+      - name: Install dependencies
+        run: pnpm install && pnpm build
+
+      - name: Build package
        shell: bash
-        run: uv sync --all-extras
+        run: uv build
+
+      - name: Get the absolute wheel file path and save it to the output
+        shell: bash
+        id: get_whl_path
+        run: |
+          WHL_FILE=$(readlink -f dist/*.whl)
+          echo "whl_file=$WHL_FILE" >> $GITHUB_OUTPUT

      - name: Test import
        shell: bash
-        run: uv run python -c "from llama_index.server import LlamaIndexServer"
+        working-directory: ${{ github.workspace }}
+        env:
+          WHL_FILE: ${{ steps.get_whl_path.outputs.whl_file }}
+        run: |
+          uv run --with $WHL_FILE python -c "from llama_index.server import LlamaIndexServer"
+
+      - name: Check frontend resources is present
+        shell: bash
+        working-directory: ${{ github.workspace }}
+        env:
+          WHL_FILE: ${{ steps.get_whl_path.outputs.whl_file }}
+        run: |
+          uv run --with $WHL_FILE python -c "from llama_index.server.chat_ui import check_ui_resources; check_ui_resources()"

      - name: Upload artifact
        uses: actions/upload-artifact@v4
        with:
          name: llama-index-server
-          path: python/llama-index-server/dist/
+          path: dist/
@@ -13,7 +13,8 @@
  },
  "license": "MIT",
  "workspaces": [
-    "packages/*"
+    "packages/*",
+    "python/*"
  ],
  "scripts": {
    "dev": "pnpm -r dev",
@@ -24,8 +25,10 @@
    "format:write": "prettier --ignore-unknown --write .",
    "prepare": "husky",
    "new-snapshot": "pnpm -r build && changeset version --snapshot",
-    "new-version": "pnpm -r build && changeset version",
-    "release": "pnpm -r build && changeset publish",
+    "new-version-python": "pnpm --filter @create-llama/llama-index-server new-version",
+    "new-version": "pnpm -r build && changeset version && pnpm new-version-python",
+    "release-python": "pnpm --filter @create-llama/llama-index-server release",
+    "release": "pnpm -r build && changeset publish && pnpm release-python",
    "release-snapshot": "pnpm -r build && changeset publish --tag snapshot"
  },
  "devDependencies": {
@@ -65,6 +65,7 @@ The `LlamaIndexServer` accepts the following configuration options:
  - `componentsDir`: The directory for custom UI components rendering events emitted by the workflow. The default is undefined, which does not render custom UI components.
  - `llamaCloudIndexSelector`: Whether to show the LlamaCloud index selector in the chat UI (requires `LLAMA_CLOUD_API_KEY` to be set in the environment variables) (default: `false`)
  - `dev_mode`: When enabled, you can update workflow code in the UI and see the changes immediately. It's currently in beta and only supports updating workflow code at `app/src/workflow.ts`. Please start server in dev mode (`npm run dev`) to use see this reload feature enabled.
+- `suggestNextQuestions`: Whether to suggest next questions after the assistant's response (default: `true`). You can change the prompt for the next questions by setting the `NEXT_QUESTION_PROMPT` environment variable.

 LlamaIndexServer accepts all the configuration options from Nextjs Custom Server such as `port`, `hostname`, `dev`, etc.
 See all Nextjs Custom Server options [here](https://nextjs.org/docs/app/building-your-application/configuring/custom-server).
@@ -35,6 +35,7 @@ export const workflowFactory = async () => {

 new LlamaIndexServer({
  workflow: workflowFactory,
+  suggestNextQuestions: true,
  uiConfig: {
    appTitle: "LlamaIndex App",
    starterQuestions: ["What is the color of the dog?"],
@@ -47,6 +47,7 @@ export async function POST(req: NextRequest) {
    );

    const dataStream = toDataStream(workflowEventStream, {
+      // TODO: Support enable/disable suggestion
      callbacks: {
        onFinal: async (completion, dataStreamWriter) => {
          chatHistory.push({
@@ -16,6 +16,7 @@ export const handleChat = async (
  req: IncomingMessage,
  res: ServerResponse,
  workflowFactory: WorkflowFactory,
+  suggestNextQuestions: boolean,
 ) => {
  try {
    const body = await parseRequestBody(req);
@@ -53,7 +54,9 @@ export const handleChat = async (
            role: "assistant" as MessageType,
            content: completion,
          });
-          await sendSuggestedQuestionsEvent(dataStreamWriter, chatHistory);
+          if (suggestNextQuestions) {
+            await sendSuggestedQuestionsEvent(dataStreamWriter, chatHistory);
+          }
        },
      },
    });
@@ -1,4 +1,5 @@
 export * from "./events";
+export * from "./prompts";
 export * from "./server";
 export * from "./types";
 export { generateEventComponent } from "./utils/gen-ui";
@@ -0,0 +1,14 @@
+export const NEXT_QUESTION_PROMPT = `You're a helpful assistant! 
+Your task is to suggest the next question that user might ask. 
+Here is the conversation history
+---------------------
+{conversation}
+---------------------
+Given the conversation history, please give me 3 questions that user might ask next!
+Your answer should be wrapped in three sticks which follows the following format:
+\`\`\`
+<question 1>
+<question 2>
+<question 3>
+\`\`\`
+`;
@@ -18,13 +18,15 @@ export class LlamaIndexServer {
  app: ReturnType<typeof next>;
  workflowFactory: () => Promise<Workflow> | Workflow;
  componentsDir?: string | undefined;
+  suggestNextQuestions: boolean;

  constructor(options: LlamaIndexServerOptions) {
-    const { workflow, ...nextAppOptions } = options;
+    const { workflow, suggestNextQuestions, ...nextAppOptions } = options;
    this.app = next({ dev, dir: nextDir, ...nextAppOptions });
    this.port = nextAppOptions.port ?? parseInt(process.env.PORT || "3000", 10);
    this.workflowFactory = workflow;
    this.componentsDir = options.uiConfig?.componentsDir;
+    this.suggestNextQuestions = suggestNextQuestions ?? true;

    if (this.componentsDir) {
      this.createComponentsDir(this.componentsDir);
@@ -52,7 +54,8 @@ export class LlamaIndexServer {
        LLAMA_CLOUD_API: ${JSON.stringify(llamaCloudApi)},
        STARTER_QUESTIONS: ${JSON.stringify(starterQuestions)},
        COMPONENTS_API: ${JSON.stringify(componentsApi)},
-        DEV_MODE: ${JSON.stringify(devMode)}
+        DEV_MODE: ${JSON.stringify(devMode)},
+        SUGGEST_NEXT_QUESTIONS: ${JSON.stringify(this.suggestNextQuestions)}
      }
    `;
    fs.writeFileSync(configFile, content);
@@ -77,7 +80,12 @@ export class LlamaIndexServer {
        // because of https://github.com/vercel/next.js/discussions/79402 we can't use route.ts here, so we need to call this custom route
        // when calling `pnpm eject`, the user will get an equivalent route at [path to chat route.ts]
        // make sure to keep its semantic in sync with handleChat
-        return handleChat(req, res, this.workflowFactory);
+        return handleChat(
+          req,
+          res,
+          this.workflowFactory,
+          this.suggestNextQuestions,
+        );
      }

      if (
@@ -23,4 +23,5 @@ export type UIConfig = {
 export type LlamaIndexServerOptions = NextAppOptions & {
  workflow: WorkflowFactory;
  uiConfig?: UIConfig;
+  suggestNextQuestions?: boolean;
 };
@@ -1,19 +1,7 @@
+import { getEnv } from "@llamaindex/env";
 import type { DataStreamWriter } from "ai";
 import { type ChatMessage, Settings } from "llamaindex";
-
-const NEXT_QUESTION_PROMPT = `You're a helpful assistant! Your task is to suggest the next question that user might ask. 
-Here is the conversation history
---------------------
-{conversation}
---------------------
-Given the conversation history, please give me 3 questions that user might ask next!
-Your answer should be wrapped in three sticks which follows the following format:
-\`\`\`
-<question 1>
-<question 2>
-<question 3>
-\`\`\`
-`;
+import { NEXT_QUESTION_PROMPT } from "../prompts";

 export const sendSuggestedQuestionsEvent = async (
  streamWriter: DataStreamWriter,
@@ -32,10 +20,8 @@ export async function generateNextQuestions(conversation: ChatMessage[]) {
  const conversationText = conversation
    .map((message) => `${message.role}: ${message.content}`)
    .join("\n");
-  const message = NEXT_QUESTION_PROMPT.replace(
-    "{conversation}",
-    conversationText,
-  );
+  const promptTemplate = getEnv("NEXT_QUESTION_PROMPT") || NEXT_QUESTION_PROMPT;
+  const message = promptTemplate.replace("{conversation}", conversationText);

  try {
    const response = await Settings.llm.complete({ prompt: message });
@@ -402,6 +402,12 @@ importers:
        specifier: ^5.3.2
        version: 5.8.3

+  python/llama-index-server:
+    dependencies:
+      '@llamaindex/server':
+        specifier: workspace:*
+        version: link:../../packages/server
+
 packages:

  '@ai-sdk/provider-utils@2.2.7':
@@ -1,3 +1,4 @@
 packages:
  - "packages/*"
  - "packages/server/examples"
+  - "python/*"
@@ -5,6 +5,7 @@
 **/venv
 **/env
 **/llama-index-server.egg-info
+llama_index/server/resources/ui

 # Jupyter files
 **/*.ipynb
@@ -84,6 +84,7 @@ The LlamaIndexServer accepts the following configuration parameters:
  - `component_dir`: The directory for custom UI components rendering events emitted by the workflow. The default is None, which does not render custom UI components.
  - `llamacloud_index_selector`: Whether to show the LlamaCloud index selector in the chat UI (default: False). Requires `LLAMA_CLOUD_API_KEY` to be set.
  - `dev_mode`: When enabled, you can update workflow code in the UI and see the changes immediately. It's currently in beta and only supports updating workflow code at `app/workflow.py`. You might also need to set `env="dev"` and start the server with the reload feature enabled.
+- `suggest_next_questions`: Whether to suggest next questions after the assistant's response (default: True). You can change the prompt for the next questions by setting the `NEXT_QUESTION_PROMPT` environment variable. The default prompt used is defined in  `llama_index.server.prompts.SUGGEST_NEXT_QUESTION_PROMPT`.
 - `verbose`: Enable verbose logging
 - `api_prefix`: API route prefix (default: "/api")
 - `server_url`: The deployment URL of the server (default is None)
@@ -7,11 +7,13 @@ from llama_index.server import LlamaIndexServer, UIConfig
 def create_app() -> FastAPI:
    app = LlamaIndexServer(
        workflow_factory=create_workflow,
+        suggest_next_questions=True,
+        env="dev",
        ui_config=UIConfig(
            app_title="Artifact",
            starter_questions=[
-                "Tell me a funny joke.",
-                "Tell me some jokes about AI.",
+                "Tell me a funny joke",
+                "Tell me some jokes about AI",
            ],
            component_dir="components",
            dev_mode=True,  # To show the dev UI, should disable this in production
@@ -3,7 +3,7 @@ import os
 from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Union

-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, field_validator

 from llama_index.core.schema import NodeWithScore
 from llama_index.core.types import ChatMessage, MessageRole
@@ -13,13 +13,6 @@ from llama_index.server.settings import server_settings
 logger = logging.getLogger("uvicorn")


-class ChatConfig(BaseModel):
-    next_question_suggestions: bool = Field(
-        default=True,
-        description="Whether to suggest next questions",
-    )
-
-
 class ChatAPIMessage(BaseModel):
    role: MessageRole
    content: str
@@ -32,7 +25,6 @@ class ChatAPIMessage(BaseModel):
 class ChatRequest(BaseModel):
    messages: List[ChatAPIMessage]
    data: Optional[Any] = None
-    config: Optional[ChatConfig] = ChatConfig()

    @field_validator("messages")
    def validate_messages(cls, v: List[ChatAPIMessage]) -> List[ChatAPIMessage]:
@@ -28,6 +28,7 @@ from llama_index.server.services.llamacloud import LlamaCloudFileService
 def chat_router(
    workflow_factory: Callable[..., Workflow],
    logger: logging.Logger,
+    suggest_next_questions: bool = True,
 ) -> APIRouter:
    router = APIRouter(prefix="/chat")

@@ -56,7 +57,7 @@ def chat_router(
                SourceNodesFromToolCall(),
                LlamaCloudFileDownload(background_tasks),
            ]
-            if request.config and request.config.next_question_suggestions:
+            if suggest_next_questions:
                callbacks.append(SuggestNextQuestions(request))
            stream_handler = StreamHandler(
                workflow_handler=workflow_handler,
@@ -1,55 +1,87 @@
+import importlib.resources
 import logging
 import shutil
 from pathlib import Path
 from typing import Optional

-import requests
-
-CHAT_UI_VERSION = "0.2.1"
+PACKAGE_NAME = "llama_index.server.resources"
+RESOURCE_DIR_NAME = "ui"


-def download_chat_ui(
+def check_ui_resources() -> None:
+    """
+    Checks if the UI resources directory exists in the specified package and lists its contents.
+    Raises a FileNotFoundError with a clear message if the directory is missing.
+    """
+    try:
+        _ = importlib.resources.files(PACKAGE_NAME).joinpath(RESOURCE_DIR_NAME)
+    except Exception as e:
+        raise Exception("UI resources not found in bundled package") from e
+
+
+def copy_bundled_chat_ui(
    logger: Optional[logging.Logger] = None, target_path: str = ".ui"
 ) -> None:
+    # Check if the UI resources directory exists
+    check_ui_resources()
+
    if logger is None:
        logger = logging.getLogger("uvicorn")
-    path = Path(target_path)
-    temp_dir = _download_package(_get_download_link(CHAT_UI_VERSION))
-    _copy_ui_files(temp_dir, path)
-    logger.info("Chat UI downloaded and copied to static folder")

+    destination_path = Path(target_path)
+    destination_path.mkdir(parents=True, exist_ok=True)

-def _get_download_link(version: str) -> str:
-    """Get the download link for the chat UI from the npm registry."""
-    return f"https://registry.npmjs.org/@llamaindex/server/-/server-{version}.tgz"
-
-
-def _download_package(url: str) -> Path:
-    """Download tar.gz file and extract all files into a temporary directory."""
-    import io
-    import tarfile
-    import tempfile
-
-    response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
-    content = response.content
-
-    temp_dir = Path(tempfile.mkdtemp())
-
-    with tarfile.open(fileobj=io.BytesIO(content), mode="r:gz") as tar:
-        tar.extractall(path=temp_dir)
-
-    return temp_dir
-
-
-def _copy_ui_files(temp_dir: Path, target_path: Path) -> None:
-    """Copy files from the .next directory to the static directory."""
-    target_path.mkdir(parents=True, exist_ok=True)
-    next_dir = temp_dir / "package/dist/static"
-
-    if next_dir.exists():
-        for item in next_dir.iterdir():
-            dest = target_path / item.name
+    try:
+        # Clear the destination directory first to avoid stale files
+        for item in destination_path.iterdir():
            if item.is_dir():
-                shutil.copytree(item, dest, dirs_exist_ok=True)
+                shutil.rmtree(item)
            else:
-                shutil.copy2(item, dest)
+                item.unlink()
+
+        # Get a reference to the source directory using importlib.resources.files (Python 3.9+)
+        source_dir_ref = importlib.resources.files(PACKAGE_NAME).joinpath(
+            RESOURCE_DIR_NAME
+        )
+
+        if not source_dir_ref.is_dir():
+            logger.error(
+                f"Static UI resource directory '{RESOURCE_DIR_NAME}' not found in package '{PACKAGE_NAME}'. Path: {source_dir_ref}"
+            )
+            logger.error(
+                "Ensure the static files are correctly bundled with the package and the path is correct."
+            )
+            return
+
+        for source_item_path_ref in source_dir_ref.iterdir():
+            # Skip __init__.py or other non-static files if present (though less likely needed with direct iteration)
+            if source_item_path_ref.name.startswith(
+                "__"
+            ) or source_item_path_ref.name.endswith(".py"):
+                continue
+
+            dest_item_path = destination_path / source_item_path_ref.name
+
+            # importlib.resources.as_file is needed to get a concrete path for shutil operations
+            with importlib.resources.as_file(
+                source_item_path_ref
+            ) as concrete_source_item_path:
+                if concrete_source_item_path.is_dir():
+                    shutil.copytree(
+                        concrete_source_item_path, dest_item_path, dirs_exist_ok=True
+                    )
+                elif concrete_source_item_path.is_file():
+                    shutil.copy2(concrete_source_item_path, dest_item_path)
+                else:
+                    logger.warning(
+                        f"Skipping resource '{source_item_path_ref.name}' as it's not a file or directory."
+                    )
+
+        logger.info(f"Chat UI files copied from package to '{destination_path}'")
+
+    except FileNotFoundError:
+        logger.error(
+            "Oops! The chat UI files are not found. Please report this issue to the LlamaIndex team."
+        )
+    except Exception as e:
+        logger.error(f"Failed to copy bundled chat UI files: {e}.")
@@ -0,0 +1,15 @@
+# Used by SuggestNextQuestionsService
+# Override this prompt by setting the `NEXT_QUESTION_PROMPT` environment variable
+SUGGEST_NEXT_QUESTION_PROMPT = """You're a helpful assistant! Your task is to suggest the next questions that user might interested in to keep the conversation going.
+Here is the conversation history
+---------------------
+{conversation}
+---------------------
+Given the conversation history, please give me 3 questions that user might ask next!
+Your answer should be wrapped in three sticks without any index numbers and follows the following format:
+\`\`\`
+<question 1>
+<question 2>
+<question 3>
+\`\`\`
+"""
@@ -15,7 +15,7 @@ from llama_index.server.api.routers import (
    custom_components_router,
    dev_router,
 )
-from llama_index.server.chat_ui import download_chat_ui
+from llama_index.server.chat_ui import copy_bundled_chat_ui
 from llama_index.server.settings import server_settings


@@ -68,11 +68,12 @@ class LlamaIndexServer(FastAPI):
        self,
        workflow_factory: Callable[..., Workflow],
        logger: Optional[logging.Logger] = None,
-        use_default_routers: Optional[bool] = True,
+        use_default_routers: Optional[bool] = None,
        env: Optional[str] = None,
        ui_config: Optional[Union[UIConfig, dict]] = None,
        server_url: Optional[str] = None,
        api_prefix: Optional[str] = None,
+        suggest_next_questions: Optional[bool] = None,
        verbose: bool = False,
        *args: Any,
        **kwargs: Any,
@@ -88,6 +89,7 @@ class LlamaIndexServer(FastAPI):
            ui_config: The configuration for the chat UI.
            server_url: The URL of the server.
            api_prefix: The prefix for the API endpoints.
+            suggest_next_questions: Whether to suggest next questions after the assistant's response.
            verbose: Whether to show verbose logs.
        """
        super().__init__(*args, **kwargs)
@@ -95,7 +97,12 @@ class LlamaIndexServer(FastAPI):
        self.workflow_factory = workflow_factory
        self.logger = logger or logging.getLogger("uvicorn")
        self.verbose = verbose
-        self.use_default_routers = use_default_routers or True
+        self.use_default_routers = (
+            True if use_default_routers is None else use_default_routers
+        )
+        self.suggest_next_questions = (
+            True if suggest_next_questions is None else suggest_next_questions
+        )
        if ui_config is None:
            self.ui_config = UIConfig()
        elif isinstance(ui_config, dict):
@@ -146,6 +153,7 @@ class LlamaIndexServer(FastAPI):
            chat_router(
                self.workflow_factory,
                self.logger,
+                self.suggest_next_questions,
            ),
            prefix=server_settings.api_prefix,
        )
@@ -177,9 +185,11 @@ class LlamaIndexServer(FastAPI):
            if not os.path.exists(self.ui_config.ui_path):
                os.makedirs(self.ui_config.ui_path)
                self.logger.warning(
-                    f"UI files not found, downloading UI to {self.ui_config.ui_path}"
+                    f"UI files not found at {self.ui_config.ui_path}. Copying bundled UI files."
+                )
+                copy_bundled_chat_ui(
+                    logger=self.logger, target_path=self.ui_config.ui_path
                )
-                download_chat_ui(logger=self.logger, target_path=self.ui_config.ui_path)
            self._mount_static_files(
                directory=self.ui_config.ui_path,
                path="/",
@@ -6,6 +6,7 @@ from typing import List, Optional, Union
 from llama_index.core.prompts import PromptTemplate
 from llama_index.core.settings import Settings
 from llama_index.server.api.models import ChatAPIMessage
+from llama_index.server.prompts import SUGGEST_NEXT_QUESTION_PROMPT

 logger = logging.getLogger("uvicorn")

@@ -15,28 +16,11 @@ class SuggestNextQuestionsService:
    Suggest the next questions that user might ask based on the conversation history.
    """

-    prompt = PromptTemplate(
-        r"""
-You're a helpful assistant! Your task is to suggest the next questions that user might interested in to keep the conversation going.
-Here is the conversation history
---------------------
-{conversation}
---------------------
-Given the conversation history, please give me 3 questions that user might ask next!
-Your answer should be wrapped in three sticks without any index numbers and follows the following format:
-\`\`\`
-<question 1>
-<question 2>
-<question 3>
-\`\`\`
-"""
-    )
-
    @classmethod
    def get_configured_prompt(cls) -> PromptTemplate:
        prompt = os.getenv("NEXT_QUESTION_PROMPT", None)
        if not prompt:
-            return cls.prompt
+            return PromptTemplate(SUGGEST_NEXT_QUESTION_PROMPT)
        return PromptTemplate(prompt)

    @classmethod
@@ -0,0 +1,16 @@
+{
+  "name": "@create-llama/llama-index-server",
+  "private": true,
+  "version": "0.1.16",
+  "type": "module",
+  "scripts": {
+    "prebuild": "uv run -- scripts/frontend.py --mode copy",
+    "build": "uv build",
+    "clean": "rm -rf dist build *.egg-info",
+    "new-version": "uv run python scripts/sync_version.py && git add pyproject.toml",
+    "release": "uv publish"
+  },
+  "dependencies": {
+    "@llamaindex/server": "workspace:*"
+  }
+}
@@ -63,5 +63,6 @@ dev = [
    "llama-cloud>=0.1.17,<1.0.0",
 ]

-[tool.hatch.build.targets.wheel]
+[tool.hatch.build]
 packages = ["llama_index/"]
+artifacts = ["llama_index/server/resources"]
@@ -0,0 +1,154 @@
+# /// script
+# requires-python = ">=3.10"
+# dependencies = []
+# ///
+# This script is used to build the frontend for the llama-index-server
+# You need to have pnpm installed to run this script
+import os
+import subprocess
+import argparse
+import shutil
+
+
+def _get_pnpm_executable() -> str:
+    """Determines the correct pnpm executable (pnpm or pnpm.cmd) and returns it.
+    Exits if pnpm is not found."""
+    pnpm_exe = shutil.which("pnpm")
+    if pnpm_exe:
+        return pnpm_exe
+    pnpm_cmd_exe = shutil.which("pnpm.cmd")
+    if pnpm_cmd_exe:
+        return pnpm_cmd_exe
+    print("pnpm not found. Please ensure pnpm is installed and in your PATH.")
+    exit(1)
+
+
+def check_pnpm_installation() -> None:
+    pnpm_exe = _get_pnpm_executable()
+    try:
+        subprocess.run(
+            [pnpm_exe, "--version"], check=True, capture_output=True
+        )  # capture_output to silence stdout on success
+    except subprocess.CalledProcessError:
+        # This case might be redundant if _get_pnpm_executable exits,
+        # but kept for robustness in case _get_pnpm_executable is changed.
+        print(
+            "pnpm is installed, but '--version' command failed. Please check your pnpm installation."
+        )
+        exit(1)
+
+
+def get_workspace_path() -> str:
+    pnpm_exe = _get_pnpm_executable()
+    # Get the absolute path of the workspace
+    # by running `pnpm root -w`
+    try:
+        output = (
+            subprocess.check_output([pnpm_exe, "root", "-w"]).decode("utf-8").strip()
+        )
+    except subprocess.CalledProcessError as e:
+        print(f"Failed to get workspace path using 'pnpm root -w': {e}")
+        print("Ensure you are in a pnpm workspace and pnpm is functioning correctly.")
+        exit(1)
+    # remove 'node_modules' at the end of the path if it exists
+    if output.endswith("node_modules"):
+        return output[:-12]
+    return output
+
+
+def build_frontend() -> None:
+    pnpm_exe = _get_pnpm_executable()
+    # Build Frontend
+    print("Building Frontend...")
+    # TODO: This probably can be copied from node_modules to save time
+    # but it could be an issue if the user haven't run `pnpm build` for server package
+    try:
+        subprocess.run(
+            [pnpm_exe, "--filter", "@llamaindex/server", "build"], check=True
+        )
+        print("Frontend built successfully.")
+    except subprocess.CalledProcessError as e:
+        print(f"Frontend build failed: {e}")
+        exit(1)
+
+
+def get_paths() -> tuple[str, str, str]:
+    workspace_path = get_workspace_path()
+    fe_assets_dir = os.path.join(workspace_path, "packages", "server", "dist", "static")
+    link_path = os.path.join(
+        workspace_path,
+        "python",
+        "llama-index-server",
+        "llama_index",
+        "server",
+        "resources",
+        "ui",
+    )
+    return workspace_path, fe_assets_dir, link_path
+
+
+def link_static_files() -> None:
+    """
+    Only works for POSIX systems.
+    Instead of copying the static files, we can link them.
+    This is useful for development purposes.
+    """
+    # Link the static files to the llama-index-server directory
+    # If user is on Windows, tell them to use WSL
+    if os.name == "nt":
+        print("Windows is not supported. Please use WSL to run this script.")
+        exit(1)
+    print("Linking static files...")
+    # Need to link by absolute path of the server directory
+    workspace_path, fe_assets_dir, link_path = get_paths()
+    # Check
+    if not os.path.exists(fe_assets_dir):
+        print(
+            f"Frontend assets directory {fe_assets_dir} does not exist. Please build the frontend first."
+        )
+        exit(1)
+    if os.path.exists(link_path):
+        if os.path.islink(link_path):
+            os.unlink(link_path)
+        else:
+            shutil.rmtree(link_path)
+    # Link the static files to the server directory
+    subprocess.run(["ln", "-s", fe_assets_dir, link_path], check=True)
+    print("Static files linked successfully.")
+
+
+def copy_static_files() -> None:
+    # Copy the static files to the output directory
+    workspace_path, fe_assets_dir, link_path = get_paths()
+    # Remove the ui directory if it exists
+    if os.path.exists(link_path):
+        if os.path.islink(link_path):
+            os.unlink(link_path)
+        else:
+            shutil.rmtree(link_path)
+    # Copy the static files to the output directory
+    shutil.copytree(fe_assets_dir, link_path, dirs_exist_ok=True)
+    print("Static files copied successfully.")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Prepare the frontend for the llama-index-server"
+    )
+    parser.add_argument(
+        "--mode",
+        choices=["link", "copy"],
+        default="copy",
+        help="Link the static files instead of copying them. Only works for POSIX systems.",
+    )
+    parser.add_argument(
+        "--skip-build", action="store_true", help="Skip the build step."
+    )
+    args = parser.parse_args()
+    check_pnpm_installation()
+    if not args.skip_build:
+        build_frontend()
+    if args.mode == "link":
+        link_static_files()
+    else:
+        copy_static_files()
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+import json
+from pathlib import Path
+
+
+def sync_versions():
+    # Read package.json
+    with open("package.json", "r") as f:
+        package_data = json.load(f)
+        npm_version = package_data["version"]
+
+    # Read pyproject.toml
+    pyproject_path = Path("pyproject.toml")
+    content = pyproject_path.read_text()
+
+    # Find the project section and update version
+    sections = content.split("\n\n")
+    for i, section in enumerate(sections):
+        if section.startswith("[project]"):
+            lines = section.split("\n")
+            for j, line in enumerate(lines):
+                if line.startswith("version = "):
+                    lines[j] = f'version = "{npm_version}"'
+            sections[i] = "\n".join(lines)
+            break
+
+    # Write back to pyproject.toml
+    pyproject_path.write_text("\n\n".join(sections))
+    print(f"Updated pyproject.toml version to {npm_version}")
+
+
+if __name__ == "__main__":
+    sync_versions()
@@ -1,13 +1,18 @@
 import json
 import os
 import shutil
+import tempfile
+from pathlib import Path

 import pytest
 from httpx import ASGITransport, AsyncClient
+
 from llama_index.core.agent.workflow import AgentWorkflow
 from llama_index.core.llms import MockLLM
 from llama_index.server import LlamaIndexServer, UIConfig

+UI_TEST = os.getenv("UI_TEST", "false").lower() == "true"
+

 def fetch_weather(city: str) -> str:
    """Fetch the weather for a given city."""
@@ -31,8 +36,7 @@ def server() -> LlamaIndexServer:
        workflow_factory=_agent_workflow,
        verbose=True,
        use_default_routers=True,
-        mount_ui=False,
-        env="dev",
+        ui_config=UIConfig(enabled=False),
    )


@@ -55,241 +59,93 @@ async def test_server_swagger_docs(server: LlamaIndexServer) -> None:
        assert "Swagger UI" in response.text


-@pytest.mark.asyncio()
-async def test_ui_is_downloaded(server: LlamaIndexServer) -> None:
-    """
-    Test if the UI is downloaded and mounted correctly.
-    """
-    # Clean up any existing static directory first
-    if os.path.exists(".ui"):
-        shutil.rmtree(".ui")
+# UI Integration Tests
+# Make sure you run the scripts/build_frontend.py script before running these tests
+if UI_TEST:

-    # Create a new server with UI enabled
-    ui_config = UIConfig(
-        enabled=True,
-        app_title="Test UI",
-        starter_questions=["What's the weather like?"],
-    )
-    ui_server = LlamaIndexServer(
-        workflow_factory=_agent_workflow,
-        verbose=True,
-        use_default_routers=True,
-        env="dev",
-        ui_config=ui_config,
-    )
+    @pytest.mark.asyncio()
+    async def test_ui_is_copied_and_mounted(tmp_path: Path) -> None:
+        """
+        Test if the UI is copied from bundle and mounted correctly.
+        """
+        tmp_ui_dir = str(tmp_path / "ui")
+        print(f"tmp_ui_dir: {tmp_ui_dir}")
+        tmp_component_dir = tempfile.mkdtemp()

-    # Verify that static directory was created with index.html
-    assert os.path.exists("./.ui"), "Static directory was not created"
-    assert os.path.isdir("./.ui"), "Static path is not a directory"
-    assert os.path.exists("./.ui/index.html"), "index.html was not downloaded"
-
-    # Check if the config.js was created with correct content
-    config_path = os.path.join(".ui", "config.js")
-    assert os.path.exists(config_path), "config.js was not created"
-
-    with open(config_path, "r") as f:
-        config_content = f.read()
-        assert "window.LLAMAINDEX =" in config_content
-        config_json = json.loads(
-            config_content.replace("window.LLAMAINDEX = ", "").rstrip(";")
+        # Create a new server with UI enabled
+        ui_config = UIConfig(
+            enabled=True,
+            app_title="Test UI",
+            starter_questions=["What's the weather like?"],
+            ui_path=tmp_ui_dir,
+            component_dir=tmp_component_dir,
        )
-        assert config_json["CHAT_API"] == "/api/chat"
-        assert config_json["STARTER_QUESTIONS"] == ["What's the weather like?"]
-        assert config_json["LLAMA_CLOUD_API"] is None
-        assert config_json["APP_TITLE"] == "Test UI"
-
-    # Check if the UI is mounted and accessible
-    async with AsyncClient(
-        transport=ASGITransport(app=ui_server), base_url="http://test"
-    ) as ac:
-        response = await ac.get("/")
-        assert response.status_code == 200
-        assert "text/html" in response.headers["content-type"]
-
-    # Clean up after test
-    shutil.rmtree("./.ui")
-
-
-@pytest.mark.asyncio()
-async def test_ui_is_accessible(server: LlamaIndexServer) -> None:
-    """
-    Test if the UI is accessible.
-    """
-    # Manually trigger UI mounting
-    server.mount_ui()
-
-    async with AsyncClient(
-        transport=ASGITransport(app=server), base_url="http://test"
-    ) as ac:
-        response = await ac.get("/")
-        assert response.status_code == 200
-        assert "text/html" in response.headers["content-type"]
-
-
-@pytest.mark.asyncio()
-async def test_ui_config_customization() -> None:
-    """
-    Test if UI configuration can be customized.
-    """
-    custom_config = UIConfig(
-        enabled=True,
-        app_title="Custom App",
-        starter_questions=["Question 1", "Question 2"],
-        ui_path=".custom_ui",
-    )
-
-    server = LlamaIndexServer(
-        workflow_factory=_agent_workflow, verbose=True, ui_config=custom_config
-    )
-
-    assert server.ui_config.app_title == "Custom App"
-    assert server.ui_config.starter_questions == ["Question 1", "Question 2"]
-    assert server.ui_config.ui_path == ".custom_ui"
-
-    # Clean up if directory was created
-    if os.path.exists(".custom_ui"):
-        shutil.rmtree(".custom_ui")
-
-
-@pytest.mark.asyncio()
-async def test_ui_config_from_dict() -> None:
-    """
-    Test if UI configuration can be initialized from a dictionary.
-    """
-    ui_config_dict = {
-        "enabled": True,
-        "app_title": "Dict Config App",
-        "starter_questions": ["Dict Q1", "Dict Q2"],
-        "ui_path": ".dict_ui",
-    }
-
-    server = LlamaIndexServer(
-        workflow_factory=_agent_workflow,
-        verbose=True,
-        ui_config=ui_config_dict,
-    )
-
-    # Verify the config was properly converted to UIConfig object
-    assert isinstance(server.ui_config, UIConfig)
-    assert server.ui_config.app_title == "Dict Config App"
-    assert server.ui_config.starter_questions == ["Dict Q1", "Dict Q2"]
-    assert server.ui_config.ui_path == ".dict_ui"
-
-    # Verify the config.js is created with correct content
-    server.mount_ui()
-    config_path = os.path.join(".dict_ui", "config.js")
-    assert os.path.exists(config_path), "config.js was not created"
-
-    with open(config_path, "r") as f:
-        config_content = f.read()
-        assert "window.LLAMAINDEX =" in config_content
-        config_json = json.loads(
-            config_content.replace("window.LLAMAINDEX = ", "").rstrip(";")
+        ui_server = LlamaIndexServer(
+            workflow_factory=_agent_workflow,
+            verbose=True,
+            use_default_routers=True,
+            env="dev",
+            ui_config=ui_config,
        )
-        assert config_json["APP_TITLE"] == "Dict Config App"
-        assert config_json["STARTER_QUESTIONS"] == ["Dict Q1", "Dict Q2"]
-        assert config_json["CHAT_API"] == "/api/chat"
-        assert config_json["LLAMA_CLOUD_API"] is None

-    # Clean up
-    if os.path.exists(".dict_ui"):
-        shutil.rmtree(".dict_ui")
+        # Verify that static directory was created with index.html
+        # List files in tmp_ui_dir
+        print("Files in tmp_ui_dir: ", os.listdir(tmp_ui_dir))
+        assert os.path.exists(tmp_ui_dir), "Static directory was not created"
+        assert os.path.isdir(tmp_ui_dir), "Static path is not a directory"
+        assert os.path.exists(os.path.join(tmp_ui_dir, "index.html")), (
+            "index.html was not copied from bundle"
+        )

+        # Check if the config.js was created with correct content
+        config_path = os.path.join(tmp_ui_dir, "config.js")
+        assert os.path.exists(config_path), "config.js was not created"

-async def test_component_dir_creation(server: LlamaIndexServer) -> None:
-    """
-    Test if the component directory is created when specified and doesn't exist.
-    """
-    import os
-    import shutil
+        with open(config_path, "r") as f:
+            config_content = f.read()
+            assert "window.LLAMAINDEX =" in config_content
+            config_json = json.loads(
+                config_content.replace("window.LLAMAINDEX = ", "").rstrip(";")
+            )
+            assert config_json["CHAT_API"] == "/api/chat"
+            assert config_json["STARTER_QUESTIONS"] == ["What's the weather like?"]
+            assert config_json["LLAMA_CLOUD_API"] is None
+            assert config_json["APP_TITLE"] == "Test UI"

-    test_component_dir = "./test_components"
+        # Verify directory was created
+        assert os.path.exists(tmp_component_dir), "Component directory was not created"
+        assert os.path.isdir(tmp_component_dir), "Component path is not a directory"

-    # Clean up any existing directory
-    if os.path.exists(test_component_dir):
-        shutil.rmtree(test_component_dir)
+        # Verify component route exists
+        component_route_exists = any(
+            route.path == "/api/components"  # type: ignore
+            for route in ui_server.routes
+        )
+        assert component_route_exists, "Component API route not found in server routes"

-    # Create server with component directory
-    _ = LlamaIndexServer(
-        workflow_factory=_agent_workflow,
-        verbose=True,
-        ui_config={
-            "component_dir": test_component_dir,
-            "include_ui": True,
-        },
-    )
+        # Check if the UI is mounted and accessible
+        async with AsyncClient(
+            transport=ASGITransport(app=ui_server), base_url="http://test"
+        ) as ac:
+            response = await ac.get("/")
+            assert response.status_code == 200
+            assert "text/html" in response.headers["content-type"]

-    # Verify directory was created
-    assert os.path.exists(test_component_dir), "Component directory was not created"
-    assert os.path.isdir(test_component_dir), "Component path is not a directory"
-
-    # Clean up after test
-    shutil.rmtree(test_component_dir)
+        # Clean up after test
+        shutil.rmtree(tmp_ui_dir)
+        shutil.rmtree(tmp_component_dir)


@pytest.mark.asyncio()
-async def test_component_router_addition(server: LlamaIndexServer, tmp_path) -> None:
-    """
-    Test if the component router is added when component directory is specified.
-    """
-    test_component_dir = tmp_path / "test_components"
-
-    # Create server with component directory
-    component_server = LlamaIndexServer(
-        workflow_factory=_agent_workflow,
-        verbose=True,
-        ui_config={
-            "component_dir": str(test_component_dir),
-            "include_ui": True,
-        },
-    )
-
-    # Verify component route exists
-    component_route_exists = any(
-        route.path == "/api/components" for route in component_server.routes
-    )
-    assert component_route_exists, "Component API route not found in server routes"
-
-
-@pytest.mark.asyncio()
-async def test_ui_config_includes_components_api(
-    server: LlamaIndexServer, tmp_path
-) -> None:
-    """
-    Test if the UI config includes components API when component directory is set.
-    """
-    test_component_dir = tmp_path / "test_components"
-
-    # Create server with component directory
-    component_server = LlamaIndexServer(
-        workflow_factory=_agent_workflow,
-        verbose=True,
-        ui_config={
-            "component_dir": str(test_component_dir),
-            "include_ui": True,
-        },
-    )
-
-    # Check if components API is in UI config
-    ui_config = component_server.ui_config
-    assert "COMPONENTS_API" in ui_config.get_config_content(), (
-        "Components API not found in UI config"
-    )
-
-
-@pytest.mark.asyncio()
-async def test_component_router_requires_component_dir(
-    server: LlamaIndexServer,
-) -> None:
+async def test_component_router_requires_component_dir() -> None:
    """
    Test that adding components router without component_dir raises an error.
    """
+    tmp_ui_dir = tempfile.mkdtemp()
    server_without_component_dir = LlamaIndexServer(
        workflow_factory=_agent_workflow,
        verbose=True,
-        ui_config={
-            "include_ui": True,
-        },
+        ui_config=UIConfig(enabled=True, ui_path=tmp_ui_dir),
    )

    with pytest.raises(
@@ -1897,7 +1897,7 @@ wheels = [

 [[package]]
 name = "llama-index-server"
-version = "0.1.15"
+version = "0.1.16"
 source = { editable = "." }
 dependencies = [
    { name = "cachetools" },