diff --git a/.changeset/deep-grapes-do.md b/.changeset/deep-grapes-do.md
new file mode 100644
index 00000000..83a9e5ba
--- /dev/null
+++ b/.changeset/deep-grapes-do.md
@@ -0,0 +1,5 @@
+---
+"@create-llama/llama-index-server": patch
+---
+
+Add suggestNextQuestions config
diff --git a/.changeset/silver-corners-fold.md b/.changeset/silver-corners-fold.md
new file mode 100644
index 00000000..7ed8dc6c
--- /dev/null
+++ b/.changeset/silver-corners-fold.md
@@ -0,0 +1,5 @@
+---
+"@llamaindex/server": patch
+---
+
+Add suggestNextQuestions config
diff --git a/.github/workflows/lint_on_push_or_pull.yml b/.github/workflows/lint_on_push_or_pull.yml
index d16a6aa8..5eecdc7c 100644
--- a/.github/workflows/lint_on_push_or_pull.yml
+++ b/.github/workflows/lint_on_push_or_pull.yml
@@ -16,6 +16,16 @@ jobs:
 
       - uses: pnpm/action-setup@v3
 
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
       - name: Setup Node.js
         uses: actions/setup-node@v4
         with:
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 2124c142..12611ced 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -17,6 +17,11 @@ jobs:
 
       - uses: pnpm/action-setup@v3
 
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
       - name: Install uv
         uses: astral-sh/setup-uv@v3
 
@@ -56,3 +61,5 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
+          PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
+          UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
diff --git a/.github/workflows/release_llama_index_server.yml b/.github/workflows/release_llama_index_server.yml
deleted file mode 100644
index 91297fbf..00000000
--- a/.github/workflows/release_llama_index_server.yml
+++ /dev/null
@@ -1,138 +0,0 @@
-name: Release llama-index-server
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - "python/llama-index-server/**"
-      - ".github/workflows/release_llama_index_server.yml"
-  pull_request:
-    types:
-      - closed
-
-concurrency: ${{ github.workflow }}-${{ github.ref }}
-
-jobs:
-  release:
-    name: Create Release PR
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: ./python/llama-index-server
-    if: |
-      github.event_name == 'push' && 
-      !startsWith(github.ref, 'refs/heads/release/llama-index-server-v') &&
-      !contains(github.event.head_commit.message, 'Release: llama-index-server v')
-
-    steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          enable-cache: true
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install dependencies
-        shell: bash
-        run: uv sync --all-extras --dev
-
-      - name: Setup Git
-        run: |
-          git config --global user.email "github-actions[bot]@users.noreply.github.com"
-          git config --global user.name "github-actions[bot]"
-
-      - name: Bump patch version
-        shell: bash
-        run: |
-          uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version $(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version | awk -F. '{$NF = $NF + 1;}1' OFS=.)
-          git add pyproject.toml
-          git commit -m "chore(release): bump llama-index-server version to $(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)"
-
-      - name: Get current version
-        id: get_version
-        shell: bash
-        run: |
-          version=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)
-          echo "current_version=${version}" >> "$GITHUB_OUTPUT"
-
-      - name: Create Release PR
-        uses: peter-evans/create-pull-request@v6
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          commit-message: "Release: llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          title: "Release: llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          body: |
-            This PR was automatically created to release a new version of the llama-index-server package.
-
-            Version: ${{ steps.get_version.outputs.current_version }}
-
-            Please review the changes and merge to trigger the release.
-          branch: release/llama-index-server-v${{ steps.get_version.outputs.current_version }}
-          base: main
-          labels: release, llama-index-server
-
-  publish:
-    name: Publish to PyPI
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: ./python/llama-index-server
-    if: |
-      github.event_name == 'pull_request' && 
-      github.event.pull_request.merged == true && 
-      startsWith(github.event.pull_request.title, 'Release: llama-index-server') &&
-      startsWith(github.event.pull_request.head.ref, 'release/llama-index-server-v')
-
-    steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          enable-cache: true
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install dependencies
-        shell: bash
-        run: uv sync --all-extras
-
-      - name: Get current version
-        id: get_version
-        shell: bash
-        run: |
-          version=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)
-          echo "current_version=${version}" >> "$GITHUB_OUTPUT"
-
-      - name: Build package
-        shell: bash
-        run: uv build --no-sources
-
-      - name: Publish to PyPI
-        shell: bash
-        run: uv publish --token ${{ secrets.PYPI_TOKEN }}
-
-      - name: Create GitHub Release
-        uses: softprops/action-gh-release@v2
-        with:
-          tag_name: llama-index-server-v${{ steps.get_version.outputs.current_version }}
-          name: "llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          body: |
-            Release of llama-index-server v${{ steps.get_version.outputs.current_version }}
-          draft: false
-          prerelease: false
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/test_llama_index_server.yml b/.github/workflows/test_llama_index_server.yml
index 1ba24f97..4e872467 100644
--- a/.github/workflows/test_llama_index_server.yml
+++ b/.github/workflows/test_llama_index_server.yml
@@ -5,6 +5,7 @@ on:
 
 env:
   PYTHON_VERSION: "3.9"
+  UI_TEST: "true"
 
 jobs:
   unit-test:
@@ -19,20 +20,27 @@ jobs:
         python-version: ["3.9"]
     steps:
       - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v3
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
 
       - name: Install uv
         uses: astral-sh/setup-uv@v5
         with:
           enable-cache: true
 
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
         with:
-          python-version: ${{ matrix.python-version }}
+          node-version-file: ".nvmrc"
+          cache: "pnpm"
 
       - name: Install dependencies
         shell: bash
-        run: uv sync --all-extras --dev
+        run: pnpm install && pnpm build
 
       - name: Run unit tests
         shell: bash
@@ -46,20 +54,20 @@ jobs:
         working-directory: python/llama-index-server
     steps:
       - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v3
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}
 
       - name: Install uv
         uses: astral-sh/setup-uv@v5
         with:
           enable-cache: true
 
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-
       - name: Install dependencies
-        shell: bash
-        run: uv sync --all-extras --dev
+        run: pnpm install
 
       - name: Run mypy
         shell: bash
@@ -73,27 +81,56 @@ jobs:
         working-directory: python/llama-index-server
     steps:
       - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          enable-cache: true
+      - uses: pnpm/action-setup@v3
 
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
           python-version: ${{ env.PYTHON_VERSION }}
 
-      - name: Install build package
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: ".nvmrc"
+          cache: "pnpm"
+
+      - name: Install dependencies
+        run: pnpm install && pnpm build
+
+      - name: Build package
         shell: bash
-        run: uv sync --all-extras
+        run: uv build
+
+      - name: Get the absolute wheel file path and save it to the output
+        shell: bash
+        id: get_whl_path
+        run: |
+          WHL_FILE=$(readlink -f dist/*.whl)
+          echo "whl_file=$WHL_FILE" >> $GITHUB_OUTPUT
 
       - name: Test import
         shell: bash
-        run: uv run python -c "from llama_index.server import LlamaIndexServer"
+        working-directory: ${{ github.workspace }}
+        env:
+          WHL_FILE: ${{ steps.get_whl_path.outputs.whl_file }}
+        run: |
+          uv run --with $WHL_FILE python -c "from llama_index.server import LlamaIndexServer"
+
+      - name: Check frontend resources is present
+        shell: bash
+        working-directory: ${{ github.workspace }}
+        env:
+          WHL_FILE: ${{ steps.get_whl_path.outputs.whl_file }}
+        run: |
+          uv run --with $WHL_FILE python -c "from llama_index.server.chat_ui import check_ui_resources; check_ui_resources()"
 
       - name: Upload artifact
         uses: actions/upload-artifact@v4
         with:
           name: llama-index-server
-          path: python/llama-index-server/dist/
+          path: dist/
diff --git a/package.json b/package.json
index c2523c48..069023e3 100644
--- a/package.json
+++ b/package.json
@@ -13,7 +13,8 @@
   },
   "license": "MIT",
   "workspaces": [
-    "packages/*"
+    "packages/*",
+    "python/*"
   ],
   "scripts": {
     "dev": "pnpm -r dev",
@@ -24,8 +25,10 @@
     "format:write": "prettier --ignore-unknown --write .",
     "prepare": "husky",
     "new-snapshot": "pnpm -r build && changeset version --snapshot",
-    "new-version": "pnpm -r build && changeset version",
-    "release": "pnpm -r build && changeset publish",
+    "new-version-python": "pnpm --filter @create-llama/llama-index-server new-version",
+    "new-version": "pnpm -r build && changeset version && pnpm new-version-python",
+    "release-python": "pnpm --filter @create-llama/llama-index-server release",
+    "release": "pnpm -r build && changeset publish && pnpm release-python",
     "release-snapshot": "pnpm -r build && changeset publish --tag snapshot"
   },
   "devDependencies": {
diff --git a/packages/server/README.md b/packages/server/README.md
index 18023818..e45a1891 100644
--- a/packages/server/README.md
+++ b/packages/server/README.md
@@ -65,6 +65,7 @@ The `LlamaIndexServer` accepts the following configuration options:
   - `componentsDir`: The directory for custom UI components rendering events emitted by the workflow. The default is undefined, which does not render custom UI components.
   - `llamaCloudIndexSelector`: Whether to show the LlamaCloud index selector in the chat UI (requires `LLAMA_CLOUD_API_KEY` to be set in the environment variables) (default: `false`)
   - `dev_mode`: When enabled, you can update workflow code in the UI and see the changes immediately. It's currently in beta and only supports updating workflow code at `app/src/workflow.ts`. Please start server in dev mode (`npm run dev`) to use see this reload feature enabled.
+- `suggestNextQuestions`: Whether to suggest next questions after the assistant's response (default: `true`). You can change the prompt for the next questions by setting the `NEXT_QUESTION_PROMPT` environment variable.
 
 LlamaIndexServer accepts all the configuration options from Nextjs Custom Server such as `port`, `hostname`, `dev`, etc.
 See all Nextjs Custom Server options [here](https://nextjs.org/docs/app/building-your-application/configuring/custom-server).
diff --git a/packages/server/examples/agentic-rag/index.ts b/packages/server/examples/agentic-rag/index.ts
index 6c9c1e34..22268d83 100644
--- a/packages/server/examples/agentic-rag/index.ts
+++ b/packages/server/examples/agentic-rag/index.ts
@@ -35,6 +35,7 @@ export const workflowFactory = async () => {
 
 new LlamaIndexServer({
   workflow: workflowFactory,
+  suggestNextQuestions: true,
   uiConfig: {
     appTitle: "LlamaIndex App",
     starterQuestions: ["What is the color of the dog?"],
diff --git a/packages/server/next/app/api/chat/route.ts b/packages/server/next/app/api/chat/route.ts
index 198e862b..b1180145 100644
--- a/packages/server/next/app/api/chat/route.ts
+++ b/packages/server/next/app/api/chat/route.ts
@@ -47,6 +47,7 @@ export async function POST(req: NextRequest) {
     );
 
     const dataStream = toDataStream(workflowEventStream, {
+      // TODO: Support enable/disable suggestion
       callbacks: {
         onFinal: async (completion, dataStreamWriter) => {
           chatHistory.push({
diff --git a/packages/server/src/handlers/chat.ts b/packages/server/src/handlers/chat.ts
index 32e43eb4..95c29b2b 100644
--- a/packages/server/src/handlers/chat.ts
+++ b/packages/server/src/handlers/chat.ts
@@ -16,6 +16,7 @@ export const handleChat = async (
   req: IncomingMessage,
   res: ServerResponse,
   workflowFactory: WorkflowFactory,
+  suggestNextQuestions: boolean,
 ) => {
   try {
     const body = await parseRequestBody(req);
@@ -53,7 +54,9 @@ export const handleChat = async (
             role: "assistant" as MessageType,
             content: completion,
           });
-          await sendSuggestedQuestionsEvent(dataStreamWriter, chatHistory);
+          if (suggestNextQuestions) {
+            await sendSuggestedQuestionsEvent(dataStreamWriter, chatHistory);
+          }
         },
       },
     });
diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts
index 31dd63e6..643f2584 100644
--- a/packages/server/src/index.ts
+++ b/packages/server/src/index.ts
@@ -1,4 +1,5 @@
 export * from "./events";
+export * from "./prompts";
 export * from "./server";
 export * from "./types";
 export { generateEventComponent } from "./utils/gen-ui";
diff --git a/packages/server/src/prompts.ts b/packages/server/src/prompts.ts
new file mode 100644
index 00000000..b0967275
--- /dev/null
+++ b/packages/server/src/prompts.ts
@@ -0,0 +1,14 @@
+export const NEXT_QUESTION_PROMPT = `You're a helpful assistant! 
+Your task is to suggest the next question that user might ask. 
+Here is the conversation history
+---------------------
+{conversation}
+---------------------
+Given the conversation history, please give me 3 questions that user might ask next!
+Your answer should be wrapped in three sticks which follows the following format:
+\`\`\`
+<question 1>
+<question 2>
+<question 3>
+\`\`\`
+`;
diff --git a/packages/server/src/server.ts b/packages/server/src/server.ts
index 6b089413..1c1704fa 100644
--- a/packages/server/src/server.ts
+++ b/packages/server/src/server.ts
@@ -18,13 +18,15 @@ export class LlamaIndexServer {
   app: ReturnType<typeof next>;
   workflowFactory: () => Promise<Workflow> | Workflow;
   componentsDir?: string | undefined;
+  suggestNextQuestions: boolean;
 
   constructor(options: LlamaIndexServerOptions) {
-    const { workflow, ...nextAppOptions } = options;
+    const { workflow, suggestNextQuestions, ...nextAppOptions } = options;
     this.app = next({ dev, dir: nextDir, ...nextAppOptions });
     this.port = nextAppOptions.port ?? parseInt(process.env.PORT || "3000", 10);
     this.workflowFactory = workflow;
     this.componentsDir = options.uiConfig?.componentsDir;
+    this.suggestNextQuestions = suggestNextQuestions ?? true;
 
     if (this.componentsDir) {
       this.createComponentsDir(this.componentsDir);
@@ -52,7 +54,8 @@ export class LlamaIndexServer {
         LLAMA_CLOUD_API: ${JSON.stringify(llamaCloudApi)},
         STARTER_QUESTIONS: ${JSON.stringify(starterQuestions)},
         COMPONENTS_API: ${JSON.stringify(componentsApi)},
-        DEV_MODE: ${JSON.stringify(devMode)}
+        DEV_MODE: ${JSON.stringify(devMode)},
+        SUGGEST_NEXT_QUESTIONS: ${JSON.stringify(this.suggestNextQuestions)}
       }
     `;
     fs.writeFileSync(configFile, content);
@@ -77,7 +80,12 @@ export class LlamaIndexServer {
         // because of https://github.com/vercel/next.js/discussions/79402 we can't use route.ts here, so we need to call this custom route
         // when calling `pnpm eject`, the user will get an equivalent route at [path to chat route.ts]
         // make sure to keep its semantic in sync with handleChat
-        return handleChat(req, res, this.workflowFactory);
+        return handleChat(
+          req,
+          res,
+          this.workflowFactory,
+          this.suggestNextQuestions,
+        );
       }
 
       if (
diff --git a/packages/server/src/types.ts b/packages/server/src/types.ts
index a47fdeea..f96c139f 100644
--- a/packages/server/src/types.ts
+++ b/packages/server/src/types.ts
@@ -23,4 +23,5 @@ export type UIConfig = {
 export type LlamaIndexServerOptions = NextAppOptions & {
   workflow: WorkflowFactory;
   uiConfig?: UIConfig;
+  suggestNextQuestions?: boolean;
 };
diff --git a/packages/server/src/utils/suggestion.ts b/packages/server/src/utils/suggestion.ts
index 89abbbcc..047544ac 100644
--- a/packages/server/src/utils/suggestion.ts
+++ b/packages/server/src/utils/suggestion.ts
@@ -1,19 +1,7 @@
+import { getEnv } from "@llamaindex/env";
 import type { DataStreamWriter } from "ai";
 import { type ChatMessage, Settings } from "llamaindex";
-
-const NEXT_QUESTION_PROMPT = `You're a helpful assistant! Your task is to suggest the next question that user might ask. 
-Here is the conversation history
----------------------
-{conversation}
----------------------
-Given the conversation history, please give me 3 questions that user might ask next!
-Your answer should be wrapped in three sticks which follows the following format:
-\`\`\`
-<question 1>
-<question 2>
-<question 3>
-\`\`\`
-`;
+import { NEXT_QUESTION_PROMPT } from "../prompts";
 
 export const sendSuggestedQuestionsEvent = async (
   streamWriter: DataStreamWriter,
@@ -32,10 +20,8 @@ export async function generateNextQuestions(conversation: ChatMessage[]) {
   const conversationText = conversation
     .map((message) => `${message.role}: ${message.content}`)
     .join("\n");
-  const message = NEXT_QUESTION_PROMPT.replace(
-    "{conversation}",
-    conversationText,
-  );
+  const promptTemplate = getEnv("NEXT_QUESTION_PROMPT") || NEXT_QUESTION_PROMPT;
+  const message = promptTemplate.replace("{conversation}", conversationText);
 
   try {
     const response = await Settings.llm.complete({ prompt: message });
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 7a7cbd47..967a11c4 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -402,6 +402,12 @@ importers:
         specifier: ^5.3.2
         version: 5.8.3
 
+  python/llama-index-server:
+    dependencies:
+      '@llamaindex/server':
+        specifier: workspace:*
+        version: link:../../packages/server
+
 packages:
 
   '@ai-sdk/provider-utils@2.2.7':
diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml
index 7b5e091d..4374999e 100644
--- a/pnpm-workspace.yaml
+++ b/pnpm-workspace.yaml
@@ -1,3 +1,4 @@
 packages:
   - "packages/*"
   - "packages/server/examples"
+  - "python/*"
diff --git a/python/llama-index-server/.gitignore b/python/llama-index-server/.gitignore
index 5442bfd7..65676711 100644
--- a/python/llama-index-server/.gitignore
+++ b/python/llama-index-server/.gitignore
@@ -5,6 +5,7 @@
 **/venv
 **/env
 **/llama-index-server.egg-info
+llama_index/server/resources/ui
 
 # Jupyter files
 **/*.ipynb
diff --git a/python/llama-index-server/README.md b/python/llama-index-server/README.md
index ac2d6455..e98f5617 100644
--- a/python/llama-index-server/README.md
+++ b/python/llama-index-server/README.md
@@ -84,6 +84,7 @@ The LlamaIndexServer accepts the following configuration parameters:
   - `component_dir`: The directory for custom UI components rendering events emitted by the workflow. The default is None, which does not render custom UI components.
   - `llamacloud_index_selector`: Whether to show the LlamaCloud index selector in the chat UI (default: False). Requires `LLAMA_CLOUD_API_KEY` to be set.
   - `dev_mode`: When enabled, you can update workflow code in the UI and see the changes immediately. It's currently in beta and only supports updating workflow code at `app/workflow.py`. You might also need to set `env="dev"` and start the server with the reload feature enabled.
+- `suggest_next_questions`: Whether to suggest next questions after the assistant's response (default: True). You can change the prompt for the next questions by setting the `NEXT_QUESTION_PROMPT` environment variable. The default prompt used is defined in  `llama_index.server.prompts.SUGGEST_NEXT_QUESTION_PROMPT`.
 - `verbose`: Enable verbose logging
 - `api_prefix`: API route prefix (default: "/api")
 - `server_url`: The deployment URL of the server (default is None)
diff --git a/python/llama-index-server/examples/simple-agent/main.py b/python/llama-index-server/examples/simple-agent/main.py
index d87880af..f319626f 100644
--- a/python/llama-index-server/examples/simple-agent/main.py
+++ b/python/llama-index-server/examples/simple-agent/main.py
@@ -7,11 +7,13 @@ from llama_index.server import LlamaIndexServer, UIConfig
 def create_app() -> FastAPI:
     app = LlamaIndexServer(
         workflow_factory=create_workflow,
+        suggest_next_questions=True,
+        env="dev",
         ui_config=UIConfig(
             app_title="Artifact",
             starter_questions=[
-                "Tell me a funny joke.",
-                "Tell me some jokes about AI.",
+                "Tell me a funny joke",
+                "Tell me some jokes about AI",
             ],
             component_dir="components",
             dev_mode=True,  # To show the dev UI, should disable this in production
diff --git a/python/llama-index-server/llama_index/server/api/models.py b/python/llama-index-server/llama_index/server/api/models.py
index 44c85dd3..e5ee27e0 100644
--- a/python/llama-index-server/llama_index/server/api/models.py
+++ b/python/llama-index-server/llama_index/server/api/models.py
@@ -3,7 +3,7 @@ import os
 from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Union
 
-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, field_validator
 
 from llama_index.core.schema import NodeWithScore
 from llama_index.core.types import ChatMessage, MessageRole
@@ -13,13 +13,6 @@ from llama_index.server.settings import server_settings
 logger = logging.getLogger("uvicorn")
 
 
-class ChatConfig(BaseModel):
-    next_question_suggestions: bool = Field(
-        default=True,
-        description="Whether to suggest next questions",
-    )
-
-
 class ChatAPIMessage(BaseModel):
     role: MessageRole
     content: str
@@ -32,7 +25,6 @@ class ChatAPIMessage(BaseModel):
 class ChatRequest(BaseModel):
     messages: List[ChatAPIMessage]
     data: Optional[Any] = None
-    config: Optional[ChatConfig] = ChatConfig()
 
     @field_validator("messages")
     def validate_messages(cls, v: List[ChatAPIMessage]) -> List[ChatAPIMessage]:
diff --git a/python/llama-index-server/llama_index/server/api/routers/chat.py b/python/llama-index-server/llama_index/server/api/routers/chat.py
index bd4b55b3..117e4f25 100644
--- a/python/llama-index-server/llama_index/server/api/routers/chat.py
+++ b/python/llama-index-server/llama_index/server/api/routers/chat.py
@@ -28,6 +28,7 @@ from llama_index.server.services.llamacloud import LlamaCloudFileService
 def chat_router(
     workflow_factory: Callable[..., Workflow],
     logger: logging.Logger,
+    suggest_next_questions: bool = True,
 ) -> APIRouter:
     router = APIRouter(prefix="/chat")
 
@@ -56,7 +57,7 @@ def chat_router(
                 SourceNodesFromToolCall(),
                 LlamaCloudFileDownload(background_tasks),
             ]
-            if request.config and request.config.next_question_suggestions:
+            if suggest_next_questions:
                 callbacks.append(SuggestNextQuestions(request))
             stream_handler = StreamHandler(
                 workflow_handler=workflow_handler,
diff --git a/python/llama-index-server/llama_index/server/chat_ui.py b/python/llama-index-server/llama_index/server/chat_ui.py
index fdb563d2..6fa5eccb 100644
--- a/python/llama-index-server/llama_index/server/chat_ui.py
+++ b/python/llama-index-server/llama_index/server/chat_ui.py
@@ -1,55 +1,87 @@
+import importlib.resources
 import logging
 import shutil
 from pathlib import Path
 from typing import Optional
 
-import requests
-
-CHAT_UI_VERSION = "0.2.1"
+PACKAGE_NAME = "llama_index.server.resources"
+RESOURCE_DIR_NAME = "ui"
 
 
-def download_chat_ui(
+def check_ui_resources() -> None:
+    """
+    Checks if the UI resources directory exists in the specified package and lists its contents.
+    Raises a FileNotFoundError with a clear message if the directory is missing.
+    """
+    try:
+        _ = importlib.resources.files(PACKAGE_NAME).joinpath(RESOURCE_DIR_NAME)
+    except Exception as e:
+        raise Exception("UI resources not found in bundled package") from e
+
+
+def copy_bundled_chat_ui(
     logger: Optional[logging.Logger] = None, target_path: str = ".ui"
 ) -> None:
+    # Check if the UI resources directory exists
+    check_ui_resources()
+
     if logger is None:
         logger = logging.getLogger("uvicorn")
-    path = Path(target_path)
-    temp_dir = _download_package(_get_download_link(CHAT_UI_VERSION))
-    _copy_ui_files(temp_dir, path)
-    logger.info("Chat UI downloaded and copied to static folder")
 
+    destination_path = Path(target_path)
+    destination_path.mkdir(parents=True, exist_ok=True)
 
-def _get_download_link(version: str) -> str:
-    """Get the download link for the chat UI from the npm registry."""
-    return f"https://registry.npmjs.org/@llamaindex/server/-/server-{version}.tgz"
-
-
-def _download_package(url: str) -> Path:
-    """Download tar.gz file and extract all files into a temporary directory."""
-    import io
-    import tarfile
-    import tempfile
-
-    response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
-    content = response.content
-
-    temp_dir = Path(tempfile.mkdtemp())
-
-    with tarfile.open(fileobj=io.BytesIO(content), mode="r:gz") as tar:
-        tar.extractall(path=temp_dir)
-
-    return temp_dir
-
-
-def _copy_ui_files(temp_dir: Path, target_path: Path) -> None:
-    """Copy files from the .next directory to the static directory."""
-    target_path.mkdir(parents=True, exist_ok=True)
-    next_dir = temp_dir / "package/dist/static"
-
-    if next_dir.exists():
-        for item in next_dir.iterdir():
-            dest = target_path / item.name
+    try:
+        # Clear the destination directory first to avoid stale files
+        for item in destination_path.iterdir():
             if item.is_dir():
-                shutil.copytree(item, dest, dirs_exist_ok=True)
+                shutil.rmtree(item)
             else:
-                shutil.copy2(item, dest)
+                item.unlink()
+
+        # Get a reference to the source directory using importlib.resources.files (Python 3.9+)
+        source_dir_ref = importlib.resources.files(PACKAGE_NAME).joinpath(
+            RESOURCE_DIR_NAME
+        )
+
+        if not source_dir_ref.is_dir():
+            logger.error(
+                f"Static UI resource directory '{RESOURCE_DIR_NAME}' not found in package '{PACKAGE_NAME}'. Path: {source_dir_ref}"
+            )
+            logger.error(
+                "Ensure the static files are correctly bundled with the package and the path is correct."
+            )
+            return
+
+        for source_item_path_ref in source_dir_ref.iterdir():
+            # Skip __init__.py or other non-static files if present (though less likely needed with direct iteration)
+            if source_item_path_ref.name.startswith(
+                "__"
+            ) or source_item_path_ref.name.endswith(".py"):
+                continue
+
+            dest_item_path = destination_path / source_item_path_ref.name
+
+            # importlib.resources.as_file is needed to get a concrete path for shutil operations
+            with importlib.resources.as_file(
+                source_item_path_ref
+            ) as concrete_source_item_path:
+                if concrete_source_item_path.is_dir():
+                    shutil.copytree(
+                        concrete_source_item_path, dest_item_path, dirs_exist_ok=True
+                    )
+                elif concrete_source_item_path.is_file():
+                    shutil.copy2(concrete_source_item_path, dest_item_path)
+                else:
+                    logger.warning(
+                        f"Skipping resource '{source_item_path_ref.name}' as it's not a file or directory."
+                    )
+
+        logger.info(f"Chat UI files copied from package to '{destination_path}'")
+
+    except FileNotFoundError:
+        logger.error(
+            "Oops! The chat UI files are not found. Please report this issue to the LlamaIndex team."
+        )
+    except Exception as e:
+        logger.error(f"Failed to copy bundled chat UI files: {e}.")
diff --git a/python/llama-index-server/llama_index/server/prompts.py b/python/llama-index-server/llama_index/server/prompts.py
new file mode 100644
index 00000000..df650a4b
--- /dev/null
+++ b/python/llama-index-server/llama_index/server/prompts.py
@@ -0,0 +1,15 @@
+# Used by SuggestNextQuestionsService
+# Override this prompt by setting the `NEXT_QUESTION_PROMPT` environment variable
+SUGGEST_NEXT_QUESTION_PROMPT = """You're a helpful assistant! Your task is to suggest the next questions that user might interested in to keep the conversation going.
+Here is the conversation history
+---------------------
+{conversation}
+---------------------
+Given the conversation history, please give me 3 questions that user might ask next!
+Your answer should be wrapped in three sticks without any index numbers and follows the following format:
+\`\`\`
+<question 1>
+<question 2>
+<question 3>
+\`\`\`
+"""
diff --git a/python/llama-index-server/llama_index/server/resources/__init__.py b/python/llama-index-server/llama_index/server/resources/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/python/llama-index-server/llama_index/server/server.py b/python/llama-index-server/llama_index/server/server.py
index 615ca4e1..6b7a216e 100644
--- a/python/llama-index-server/llama_index/server/server.py
+++ b/python/llama-index-server/llama_index/server/server.py
@@ -15,7 +15,7 @@ from llama_index.server.api.routers import (
     custom_components_router,
     dev_router,
 )
-from llama_index.server.chat_ui import download_chat_ui
+from llama_index.server.chat_ui import copy_bundled_chat_ui
 from llama_index.server.settings import server_settings
 
 
@@ -68,11 +68,12 @@ class LlamaIndexServer(FastAPI):
         self,
         workflow_factory: Callable[..., Workflow],
         logger: Optional[logging.Logger] = None,
-        use_default_routers: Optional[bool] = True,
+        use_default_routers: Optional[bool] = None,
         env: Optional[str] = None,
         ui_config: Optional[Union[UIConfig, dict]] = None,
         server_url: Optional[str] = None,
         api_prefix: Optional[str] = None,
+        suggest_next_questions: Optional[bool] = None,
         verbose: bool = False,
         *args: Any,
         **kwargs: Any,
@@ -88,6 +89,7 @@ class LlamaIndexServer(FastAPI):
             ui_config: The configuration for the chat UI.
             server_url: The URL of the server.
             api_prefix: The prefix for the API endpoints.
+            suggest_next_questions: Whether to suggest next questions after the assistant's response.
             verbose: Whether to show verbose logs.
         """
         super().__init__(*args, **kwargs)
@@ -95,7 +97,12 @@ class LlamaIndexServer(FastAPI):
         self.workflow_factory = workflow_factory
         self.logger = logger or logging.getLogger("uvicorn")
         self.verbose = verbose
-        self.use_default_routers = use_default_routers or True
+        self.use_default_routers = (
+            True if use_default_routers is None else use_default_routers
+        )
+        self.suggest_next_questions = (
+            True if suggest_next_questions is None else suggest_next_questions
+        )
         if ui_config is None:
             self.ui_config = UIConfig()
         elif isinstance(ui_config, dict):
@@ -146,6 +153,7 @@ class LlamaIndexServer(FastAPI):
             chat_router(
                 self.workflow_factory,
                 self.logger,
+                self.suggest_next_questions,
             ),
             prefix=server_settings.api_prefix,
         )
@@ -177,9 +185,11 @@ class LlamaIndexServer(FastAPI):
             if not os.path.exists(self.ui_config.ui_path):
                 os.makedirs(self.ui_config.ui_path)
                 self.logger.warning(
-                    f"UI files not found, downloading UI to {self.ui_config.ui_path}"
+                    f"UI files not found at {self.ui_config.ui_path}. Copying bundled UI files."
+                )
+                copy_bundled_chat_ui(
+                    logger=self.logger, target_path=self.ui_config.ui_path
                 )
-                download_chat_ui(logger=self.logger, target_path=self.ui_config.ui_path)
             self._mount_static_files(
                 directory=self.ui_config.ui_path,
                 path="/",
diff --git a/python/llama-index-server/llama_index/server/services/suggest_next_question.py b/python/llama-index-server/llama_index/server/services/suggest_next_question.py
index 88c9ec2e..aff24467 100644
--- a/python/llama-index-server/llama_index/server/services/suggest_next_question.py
+++ b/python/llama-index-server/llama_index/server/services/suggest_next_question.py
@@ -6,6 +6,7 @@ from typing import List, Optional, Union
 from llama_index.core.prompts import PromptTemplate
 from llama_index.core.settings import Settings
 from llama_index.server.api.models import ChatAPIMessage
+from llama_index.server.prompts import SUGGEST_NEXT_QUESTION_PROMPT
 
 logger = logging.getLogger("uvicorn")
 
@@ -15,28 +16,11 @@ class SuggestNextQuestionsService:
     Suggest the next questions that user might ask based on the conversation history.
     """
 
-    prompt = PromptTemplate(
-        r"""
-You're a helpful assistant! Your task is to suggest the next questions that user might interested in to keep the conversation going.
-Here is the conversation history
----------------------
-{conversation}
----------------------
-Given the conversation history, please give me 3 questions that user might ask next!
-Your answer should be wrapped in three sticks without any index numbers and follows the following format:
-\`\`\`
-<question 1>
-<question 2>
-<question 3>
-\`\`\`
-"""
-    )
-
     @classmethod
     def get_configured_prompt(cls) -> PromptTemplate:
         prompt = os.getenv("NEXT_QUESTION_PROMPT", None)
         if not prompt:
-            return cls.prompt
+            return PromptTemplate(SUGGEST_NEXT_QUESTION_PROMPT)
         return PromptTemplate(prompt)
 
     @classmethod
diff --git a/python/llama-index-server/package.json b/python/llama-index-server/package.json
new file mode 100644
index 00000000..bbf408e5
--- /dev/null
+++ b/python/llama-index-server/package.json
@@ -0,0 +1,16 @@
+{
+  "name": "@create-llama/llama-index-server",
+  "private": true,
+  "version": "0.1.16",
+  "type": "module",
+  "scripts": {
+    "prebuild": "uv run -- scripts/frontend.py --mode copy",
+    "build": "uv build",
+    "clean": "rm -rf dist build *.egg-info",
+    "new-version": "uv run python scripts/sync_version.py && git add pyproject.toml",
+    "release": "uv publish"
+  },
+  "dependencies": {
+    "@llamaindex/server": "workspace:*"
+  }
+}
\ No newline at end of file
diff --git a/python/llama-index-server/pyproject.toml b/python/llama-index-server/pyproject.toml
index 6c6f6d98..979f64fe 100644
--- a/python/llama-index-server/pyproject.toml
+++ b/python/llama-index-server/pyproject.toml
@@ -63,5 +63,6 @@ dev = [
     "llama-cloud>=0.1.17,<1.0.0",
 ]
 
-[tool.hatch.build.targets.wheel]
+[tool.hatch.build]
 packages = ["llama_index/"]
+artifacts = ["llama_index/server/resources"]
\ No newline at end of file
diff --git a/python/llama-index-server/scripts/frontend.py b/python/llama-index-server/scripts/frontend.py
new file mode 100644
index 00000000..3b4684d5
--- /dev/null
+++ b/python/llama-index-server/scripts/frontend.py
@@ -0,0 +1,154 @@
+# /// script
+# requires-python = ">=3.10"
+# dependencies = []
+# ///
+# This script is used to build the frontend for the llama-index-server
+# You need to have pnpm installed to run this script
+import os
+import subprocess
+import argparse
+import shutil
+
+
+def _get_pnpm_executable() -> str:
+    """Determines the correct pnpm executable (pnpm or pnpm.cmd) and returns it.
+    Exits if pnpm is not found."""
+    pnpm_exe = shutil.which("pnpm")
+    if pnpm_exe:
+        return pnpm_exe
+    pnpm_cmd_exe = shutil.which("pnpm.cmd")
+    if pnpm_cmd_exe:
+        return pnpm_cmd_exe
+    print("pnpm not found. Please ensure pnpm is installed and in your PATH.")
+    exit(1)
+
+
+def check_pnpm_installation() -> None:
+    pnpm_exe = _get_pnpm_executable()
+    try:
+        subprocess.run(
+            [pnpm_exe, "--version"], check=True, capture_output=True
+        )  # capture_output to silence stdout on success
+    except subprocess.CalledProcessError:
+        # This case might be redundant if _get_pnpm_executable exits,
+        # but kept for robustness in case _get_pnpm_executable is changed.
+        print(
+            "pnpm is installed, but '--version' command failed. Please check your pnpm installation."
+        )
+        exit(1)
+
+
+def get_workspace_path() -> str:
+    pnpm_exe = _get_pnpm_executable()
+    # Get the absolute path of the workspace
+    # by running `pnpm root -w`
+    try:
+        output = (
+            subprocess.check_output([pnpm_exe, "root", "-w"]).decode("utf-8").strip()
+        )
+    except subprocess.CalledProcessError as e:
+        print(f"Failed to get workspace path using 'pnpm root -w': {e}")
+        print("Ensure you are in a pnpm workspace and pnpm is functioning correctly.")
+        exit(1)
+    # remove 'node_modules' at the end of the path if it exists
+    if output.endswith("node_modules"):
+        return output[:-12]
+    return output
+
+
+def build_frontend() -> None:
+    pnpm_exe = _get_pnpm_executable()
+    # Build Frontend
+    print("Building Frontend...")
+    # TODO: This probably can be copied from node_modules to save time
+    # but it could be an issue if the user haven't run `pnpm build` for server package
+    try:
+        subprocess.run(
+            [pnpm_exe, "--filter", "@llamaindex/server", "build"], check=True
+        )
+        print("Frontend built successfully.")
+    except subprocess.CalledProcessError as e:
+        print(f"Frontend build failed: {e}")
+        exit(1)
+
+
+def get_paths() -> tuple[str, str, str]:
+    workspace_path = get_workspace_path()
+    fe_assets_dir = os.path.join(workspace_path, "packages", "server", "dist", "static")
+    link_path = os.path.join(
+        workspace_path,
+        "python",
+        "llama-index-server",
+        "llama_index",
+        "server",
+        "resources",
+        "ui",
+    )
+    return workspace_path, fe_assets_dir, link_path
+
+
+def link_static_files() -> None:
+    """
+    Only works for POSIX systems.
+    Instead of copying the static files, we can link them.
+    This is useful for development purposes.
+    """
+    # Link the static files to the llama-index-server directory
+    # If user is on Windows, tell them to use WSL
+    if os.name == "nt":
+        print("Windows is not supported. Please use WSL to run this script.")
+        exit(1)
+    print("Linking static files...")
+    # Need to link by absolute path of the server directory
+    workspace_path, fe_assets_dir, link_path = get_paths()
+    # Check
+    if not os.path.exists(fe_assets_dir):
+        print(
+            f"Frontend assets directory {fe_assets_dir} does not exist. Please build the frontend first."
+        )
+        exit(1)
+    if os.path.exists(link_path):
+        if os.path.islink(link_path):
+            os.unlink(link_path)
+        else:
+            shutil.rmtree(link_path)
+    # Link the static files to the server directory
+    subprocess.run(["ln", "-s", fe_assets_dir, link_path], check=True)
+    print("Static files linked successfully.")
+
+
+def copy_static_files() -> None:
+    # Copy the static files to the output directory
+    workspace_path, fe_assets_dir, link_path = get_paths()
+    # Remove the ui directory if it exists
+    if os.path.exists(link_path):
+        if os.path.islink(link_path):
+            os.unlink(link_path)
+        else:
+            shutil.rmtree(link_path)
+    # Copy the static files to the output directory
+    shutil.copytree(fe_assets_dir, link_path, dirs_exist_ok=True)
+    print("Static files copied successfully.")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Prepare the frontend for the llama-index-server"
+    )
+    parser.add_argument(
+        "--mode",
+        choices=["link", "copy"],
+        default="copy",
+        help="Link the static files instead of copying them. Only works for POSIX systems.",
+    )
+    parser.add_argument(
+        "--skip-build", action="store_true", help="Skip the build step."
+    )
+    args = parser.parse_args()
+    check_pnpm_installation()
+    if not args.skip_build:
+        build_frontend()
+    if args.mode == "link":
+        link_static_files()
+    else:
+        copy_static_files()
diff --git a/python/llama-index-server/scripts/sync_version.py b/python/llama-index-server/scripts/sync_version.py
new file mode 100755
index 00000000..472ac5a2
--- /dev/null
+++ b/python/llama-index-server/scripts/sync_version.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+import json
+from pathlib import Path
+
+
+def sync_versions():
+    # Read package.json
+    with open("package.json", "r") as f:
+        package_data = json.load(f)
+        npm_version = package_data["version"]
+
+    # Read pyproject.toml
+    pyproject_path = Path("pyproject.toml")
+    content = pyproject_path.read_text()
+
+    # Find the project section and update version
+    sections = content.split("\n\n")
+    for i, section in enumerate(sections):
+        if section.startswith("[project]"):
+            lines = section.split("\n")
+            for j, line in enumerate(lines):
+                if line.startswith("version = "):
+                    lines[j] = f'version = "{npm_version}"'
+            sections[i] = "\n".join(lines)
+            break
+
+    # Write back to pyproject.toml
+    pyproject_path.write_text("\n\n".join(sections))
+    print(f"Updated pyproject.toml version to {npm_version}")
+
+
+if __name__ == "__main__":
+    sync_versions()
diff --git a/python/llama-index-server/tests/test_llamaindex_server.py b/python/llama-index-server/tests/test_llamaindex_server.py
index 506b4a9a..abd63bf2 100644
--- a/python/llama-index-server/tests/test_llamaindex_server.py
+++ b/python/llama-index-server/tests/test_llamaindex_server.py
@@ -1,13 +1,18 @@
 import json
 import os
 import shutil
+import tempfile
+from pathlib import Path
 
 import pytest
 from httpx import ASGITransport, AsyncClient
+
 from llama_index.core.agent.workflow import AgentWorkflow
 from llama_index.core.llms import MockLLM
 from llama_index.server import LlamaIndexServer, UIConfig
 
+UI_TEST = os.getenv("UI_TEST", "false").lower() == "true"
+
 
 def fetch_weather(city: str) -> str:
     """Fetch the weather for a given city."""
@@ -31,8 +36,7 @@ def server() -> LlamaIndexServer:
         workflow_factory=_agent_workflow,
         verbose=True,
         use_default_routers=True,
-        mount_ui=False,
-        env="dev",
+        ui_config=UIConfig(enabled=False),
     )
 
 
@@ -55,241 +59,93 @@ async def test_server_swagger_docs(server: LlamaIndexServer) -> None:
         assert "Swagger UI" in response.text
 
 
-@pytest.mark.asyncio()
-async def test_ui_is_downloaded(server: LlamaIndexServer) -> None:
-    """
-    Test if the UI is downloaded and mounted correctly.
-    """
-    # Clean up any existing static directory first
-    if os.path.exists(".ui"):
-        shutil.rmtree(".ui")
+# UI Integration Tests
+# Make sure you run the scripts/build_frontend.py script before running these tests
+if UI_TEST:
 
-    # Create a new server with UI enabled
-    ui_config = UIConfig(
-        enabled=True,
-        app_title="Test UI",
-        starter_questions=["What's the weather like?"],
-    )
-    ui_server = LlamaIndexServer(
-        workflow_factory=_agent_workflow,
-        verbose=True,
-        use_default_routers=True,
-        env="dev",
-        ui_config=ui_config,
-    )
+    @pytest.mark.asyncio()
+    async def test_ui_is_copied_and_mounted(tmp_path: Path) -> None:
+        """
+        Test if the UI is copied from bundle and mounted correctly.
+        """
+        tmp_ui_dir = str(tmp_path / "ui")
+        print(f"tmp_ui_dir: {tmp_ui_dir}")
+        tmp_component_dir = tempfile.mkdtemp()
 
-    # Verify that static directory was created with index.html
-    assert os.path.exists("./.ui"), "Static directory was not created"
-    assert os.path.isdir("./.ui"), "Static path is not a directory"
-    assert os.path.exists("./.ui/index.html"), "index.html was not downloaded"
-
-    # Check if the config.js was created with correct content
-    config_path = os.path.join(".ui", "config.js")
-    assert os.path.exists(config_path), "config.js was not created"
-
-    with open(config_path, "r") as f:
-        config_content = f.read()
-        assert "window.LLAMAINDEX =" in config_content
-        config_json = json.loads(
-            config_content.replace("window.LLAMAINDEX = ", "").rstrip(";")
+        # Create a new server with UI enabled
+        ui_config = UIConfig(
+            enabled=True,
+            app_title="Test UI",
+            starter_questions=["What's the weather like?"],
+            ui_path=tmp_ui_dir,
+            component_dir=tmp_component_dir,
         )
-        assert config_json["CHAT_API"] == "/api/chat"
-        assert config_json["STARTER_QUESTIONS"] == ["What's the weather like?"]
-        assert config_json["LLAMA_CLOUD_API"] is None
-        assert config_json["APP_TITLE"] == "Test UI"
-
-    # Check if the UI is mounted and accessible
-    async with AsyncClient(
-        transport=ASGITransport(app=ui_server), base_url="http://test"
-    ) as ac:
-        response = await ac.get("/")
-        assert response.status_code == 200
-        assert "text/html" in response.headers["content-type"]
-
-    # Clean up after test
-    shutil.rmtree("./.ui")
-
-
-@pytest.mark.asyncio()
-async def test_ui_is_accessible(server: LlamaIndexServer) -> None:
-    """
-    Test if the UI is accessible.
-    """
-    # Manually trigger UI mounting
-    server.mount_ui()
-
-    async with AsyncClient(
-        transport=ASGITransport(app=server), base_url="http://test"
-    ) as ac:
-        response = await ac.get("/")
-        assert response.status_code == 200
-        assert "text/html" in response.headers["content-type"]
-
-
-@pytest.mark.asyncio()
-async def test_ui_config_customization() -> None:
-    """
-    Test if UI configuration can be customized.
-    """
-    custom_config = UIConfig(
-        enabled=True,
-        app_title="Custom App",
-        starter_questions=["Question 1", "Question 2"],
-        ui_path=".custom_ui",
-    )
-
-    server = LlamaIndexServer(
-        workflow_factory=_agent_workflow, verbose=True, ui_config=custom_config
-    )
-
-    assert server.ui_config.app_title == "Custom App"
-    assert server.ui_config.starter_questions == ["Question 1", "Question 2"]
-    assert server.ui_config.ui_path == ".custom_ui"
-
-    # Clean up if directory was created
-    if os.path.exists(".custom_ui"):
-        shutil.rmtree(".custom_ui")
-
-
-@pytest.mark.asyncio()
-async def test_ui_config_from_dict() -> None:
-    """
-    Test if UI configuration can be initialized from a dictionary.
-    """
-    ui_config_dict = {
-        "enabled": True,
-        "app_title": "Dict Config App",
-        "starter_questions": ["Dict Q1", "Dict Q2"],
-        "ui_path": ".dict_ui",
-    }
-
-    server = LlamaIndexServer(
-        workflow_factory=_agent_workflow,
-        verbose=True,
-        ui_config=ui_config_dict,
-    )
-
-    # Verify the config was properly converted to UIConfig object
-    assert isinstance(server.ui_config, UIConfig)
-    assert server.ui_config.app_title == "Dict Config App"
-    assert server.ui_config.starter_questions == ["Dict Q1", "Dict Q2"]
-    assert server.ui_config.ui_path == ".dict_ui"
-
-    # Verify the config.js is created with correct content
-    server.mount_ui()
-    config_path = os.path.join(".dict_ui", "config.js")
-    assert os.path.exists(config_path), "config.js was not created"
-
-    with open(config_path, "r") as f:
-        config_content = f.read()
-        assert "window.LLAMAINDEX =" in config_content
-        config_json = json.loads(
-            config_content.replace("window.LLAMAINDEX = ", "").rstrip(";")
+        ui_server = LlamaIndexServer(
+            workflow_factory=_agent_workflow,
+            verbose=True,
+            use_default_routers=True,
+            env="dev",
+            ui_config=ui_config,
         )
-        assert config_json["APP_TITLE"] == "Dict Config App"
-        assert config_json["STARTER_QUESTIONS"] == ["Dict Q1", "Dict Q2"]
-        assert config_json["CHAT_API"] == "/api/chat"
-        assert config_json["LLAMA_CLOUD_API"] is None
 
-    # Clean up
-    if os.path.exists(".dict_ui"):
-        shutil.rmtree(".dict_ui")
+        # Verify that static directory was created with index.html
+        # List files in tmp_ui_dir
+        print("Files in tmp_ui_dir: ", os.listdir(tmp_ui_dir))
+        assert os.path.exists(tmp_ui_dir), "Static directory was not created"
+        assert os.path.isdir(tmp_ui_dir), "Static path is not a directory"
+        assert os.path.exists(os.path.join(tmp_ui_dir, "index.html")), (
+            "index.html was not copied from bundle"
+        )
 
+        # Check if the config.js was created with correct content
+        config_path = os.path.join(tmp_ui_dir, "config.js")
+        assert os.path.exists(config_path), "config.js was not created"
 
-async def test_component_dir_creation(server: LlamaIndexServer) -> None:
-    """
-    Test if the component directory is created when specified and doesn't exist.
-    """
-    import os
-    import shutil
+        with open(config_path, "r") as f:
+            config_content = f.read()
+            assert "window.LLAMAINDEX =" in config_content
+            config_json = json.loads(
+                config_content.replace("window.LLAMAINDEX = ", "").rstrip(";")
+            )
+            assert config_json["CHAT_API"] == "/api/chat"
+            assert config_json["STARTER_QUESTIONS"] == ["What's the weather like?"]
+            assert config_json["LLAMA_CLOUD_API"] is None
+            assert config_json["APP_TITLE"] == "Test UI"
 
-    test_component_dir = "./test_components"
+        # Verify directory was created
+        assert os.path.exists(tmp_component_dir), "Component directory was not created"
+        assert os.path.isdir(tmp_component_dir), "Component path is not a directory"
 
-    # Clean up any existing directory
-    if os.path.exists(test_component_dir):
-        shutil.rmtree(test_component_dir)
+        # Verify component route exists
+        component_route_exists = any(
+            route.path == "/api/components"  # type: ignore
+            for route in ui_server.routes
+        )
+        assert component_route_exists, "Component API route not found in server routes"
 
-    # Create server with component directory
-    _ = LlamaIndexServer(
-        workflow_factory=_agent_workflow,
-        verbose=True,
-        ui_config={
-            "component_dir": test_component_dir,
-            "include_ui": True,
-        },
-    )
+        # Check if the UI is mounted and accessible
+        async with AsyncClient(
+            transport=ASGITransport(app=ui_server), base_url="http://test"
+        ) as ac:
+            response = await ac.get("/")
+            assert response.status_code == 200
+            assert "text/html" in response.headers["content-type"]
 
-    # Verify directory was created
-    assert os.path.exists(test_component_dir), "Component directory was not created"
-    assert os.path.isdir(test_component_dir), "Component path is not a directory"
-
-    # Clean up after test
-    shutil.rmtree(test_component_dir)
+        # Clean up after test
+        shutil.rmtree(tmp_ui_dir)
+        shutil.rmtree(tmp_component_dir)
 
 
 @pytest.mark.asyncio()
-async def test_component_router_addition(server: LlamaIndexServer, tmp_path) -> None:
-    """
-    Test if the component router is added when component directory is specified.
-    """
-    test_component_dir = tmp_path / "test_components"
-
-    # Create server with component directory
-    component_server = LlamaIndexServer(
-        workflow_factory=_agent_workflow,
-        verbose=True,
-        ui_config={
-            "component_dir": str(test_component_dir),
-            "include_ui": True,
-        },
-    )
-
-    # Verify component route exists
-    component_route_exists = any(
-        route.path == "/api/components" for route in component_server.routes
-    )
-    assert component_route_exists, "Component API route not found in server routes"
-
-
-@pytest.mark.asyncio()
-async def test_ui_config_includes_components_api(
-    server: LlamaIndexServer, tmp_path
-) -> None:
-    """
-    Test if the UI config includes components API when component directory is set.
-    """
-    test_component_dir = tmp_path / "test_components"
-
-    # Create server with component directory
-    component_server = LlamaIndexServer(
-        workflow_factory=_agent_workflow,
-        verbose=True,
-        ui_config={
-            "component_dir": str(test_component_dir),
-            "include_ui": True,
-        },
-    )
-
-    # Check if components API is in UI config
-    ui_config = component_server.ui_config
-    assert "COMPONENTS_API" in ui_config.get_config_content(), (
-        "Components API not found in UI config"
-    )
-
-
-@pytest.mark.asyncio()
-async def test_component_router_requires_component_dir(
-    server: LlamaIndexServer,
-) -> None:
+async def test_component_router_requires_component_dir() -> None:
     """
     Test that adding components router without component_dir raises an error.
     """
+    tmp_ui_dir = tempfile.mkdtemp()
     server_without_component_dir = LlamaIndexServer(
         workflow_factory=_agent_workflow,
         verbose=True,
-        ui_config={
-            "include_ui": True,
-        },
+        ui_config=UIConfig(enabled=True, ui_path=tmp_ui_dir),
     )
 
     with pytest.raises(
diff --git a/python/llama-index-server/uv.lock b/python/llama-index-server/uv.lock
index ee5d0fe3..6a2c2f0a 100644
--- a/python/llama-index-server/uv.lock
+++ b/python/llama-index-server/uv.lock
@@ -1897,7 +1897,7 @@ wheels = [
 
 [[package]]
 name = "llama-index-server"
-version = "0.1.15"
+version = "0.1.16"
 source = { editable = "." }
 dependencies = [
     { name = "cachetools" },