use eject file in linux (#663 )

Release 0.1.18 (#660 )
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2026-07-02 19:14:28 -04:00 · 2025-05-29 09:15:52 +07:00 · 2025-05-28 17:57:45 +07:00 · 2025-05-28 17:50:23 +07:00 · 2025-05-28 17:28:50 +07:00 · 2025-05-28 17:09:47 +07:00
108 changed files with 5768 additions and 3468 deletions
@@ -64,6 +64,15 @@ jobs:
        run: pnpm run pack-install
        working-directory: packages/create-llama

+      - name: Build and store server package
+        run: |
+          pnpm run build
+          wheel_file=$(ls dist/*.whl | head -n 1)
+          mkdir -p "${{ runner.temp }}"
+          cp "$wheel_file" "${{ runner.temp }}/"
+          echo "SERVER_PACKAGE_PATH=${{ runner.temp }}/$(basename "$wheel_file")" >> $GITHUB_ENV
+        working-directory: python/llama-index-server
+
      - name: Run Playwright tests for Python
        run: pnpm run e2e:python
        env:
@@ -74,6 +83,7 @@ jobs:
          TEMPLATE_TYPE: ${{ matrix.template-types }}
          PYTHONIOENCODING: utf-8
          PYTHONLEGACYWINDOWSSTDIO: utf-8
+          SERVER_PACKAGE_PATH: ${{ env.SERVER_PACKAGE_PATH }}
        working-directory: packages/create-llama

      - uses: actions/upload-artifact@v4
@@ -16,6 +16,16 @@ jobs:

      - uses: pnpm/action-setup@v3

+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
@@ -17,6 +17,11 @@ jobs:

      - uses: pnpm/action-setup@v3

+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
      - name: Install uv
        uses: astral-sh/setup-uv@v3

@@ -51,8 +56,12 @@ jobs:
        with:
          commit: Release ${{ steps.get-changeset-status.outputs.new-version }}
          title: Release ${{ steps.get-changeset-status.outputs.new-version }}
+          # bump versions
+          version: pnpm new-version
          # build package and call changeset publish
          publish: pnpm release
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
+          PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
+          UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
@@ -1,138 +0,0 @@
-name: Release llama-index-server
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - "python/llama-index-server/**"
-      - ".github/workflows/release_llama_index_server.yml"
-  pull_request:
-    types:
-      - closed
-
-concurrency: ${{ github.workflow }}-${{ github.ref }}
-
-jobs:
-  release:
-    name: Create Release PR
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: ./python/llama-index-server
-    if: |
-      github.event_name == 'push' && 
-      !startsWith(github.ref, 'refs/heads/release/llama-index-server-v') &&
-      !contains(github.event.head_commit.message, 'Release: llama-index-server v')
-
-    steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          enable-cache: true
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install dependencies
-        shell: bash
-        run: uv sync --all-extras --dev
-
-      - name: Setup Git
-        run: |
-          git config --global user.email "github-actions[bot]@users.noreply.github.com"
-          git config --global user.name "github-actions[bot]"
-
-      - name: Bump patch version
-        shell: bash
-        run: |
-          uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version $(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version | awk -F. '{$NF = $NF + 1;}1' OFS=.)
-          git add pyproject.toml
-          git commit -m "chore(release): bump llama-index-server version to $(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)"
-
-      - name: Get current version
-        id: get_version
-        shell: bash
-        run: |
-          version=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)
-          echo "current_version=${version}" >> "$GITHUB_OUTPUT"
-
-      - name: Create Release PR
-        uses: peter-evans/create-pull-request@v6
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          commit-message: "Release: llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          title: "Release: llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          body: |
-            This PR was automatically created to release a new version of the llama-index-server package.
-
-            Version: ${{ steps.get_version.outputs.current_version }}
-
-            Please review the changes and merge to trigger the release.
-          branch: release/llama-index-server-v${{ steps.get_version.outputs.current_version }}
-          base: main
-          labels: release, llama-index-server
-
-  publish:
-    name: Publish to PyPI
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: ./python/llama-index-server
-    if: |
-      github.event_name == 'pull_request' && 
-      github.event.pull_request.merged == true && 
-      startsWith(github.event.pull_request.title, 'Release: llama-index-server') &&
-      startsWith(github.event.pull_request.head.ref, 'release/llama-index-server-v')
-
-    steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          enable-cache: true
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install dependencies
-        shell: bash
-        run: uv sync --all-extras
-
-      - name: Get current version
-        id: get_version
-        shell: bash
-        run: |
-          version=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)
-          echo "current_version=${version}" >> "$GITHUB_OUTPUT"
-
-      - name: Build package
-        shell: bash
-        run: uv build --no-sources
-
-      - name: Publish to PyPI
-        shell: bash
-        run: uv publish --token ${{ secrets.PYPI_TOKEN }}
-
-      - name: Create GitHub Release
-        uses: softprops/action-gh-release@v2
-        with:
-          tag_name: llama-index-server-v${{ steps.get_version.outputs.current_version }}
-          name: "llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          body: |
-            Release of llama-index-server v${{ steps.get_version.outputs.current_version }}
-          draft: false
-          prerelease: false
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -5,6 +5,7 @@ on:

 env:
  PYTHON_VERSION: "3.9"
+  UI_TEST: "true"

 jobs:
  unit-test:
@@ -19,20 +20,27 @@ jobs:
        python-version: ["3.9"]
    steps:
      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v3
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}

      - name: Install uv
        uses: astral-sh/setup-uv@v5
        with:
          enable-cache: true

-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
        with:
-          python-version: ${{ matrix.python-version }}
+          node-version-file: ".nvmrc"
+          cache: "pnpm"

      - name: Install dependencies
        shell: bash
-        run: uv sync --all-extras --dev
+        run: pnpm install && pnpm build

      - name: Run unit tests
        shell: bash
@@ -46,20 +54,20 @@ jobs:
        working-directory: python/llama-index-server
    steps:
      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v3
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}

      - name: Install uv
        uses: astral-sh/setup-uv@v5
        with:
          enable-cache: true

-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-
      - name: Install dependencies
-        shell: bash
-        run: uv sync --all-extras --dev
+        run: pnpm install

      - name: Run mypy
        shell: bash
@@ -73,27 +81,56 @@ jobs:
        working-directory: python/llama-index-server
    steps:
      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          enable-cache: true
+      - uses: pnpm/action-setup@v3

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}

-      - name: Install build package
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: ".nvmrc"
+          cache: "pnpm"
+
+      - name: Install dependencies
+        run: pnpm install && pnpm build
+
+      - name: Build package
        shell: bash
-        run: uv sync --all-extras
+        run: uv build
+
+      - name: Get the absolute wheel file path and save it to the output
+        shell: bash
+        id: get_whl_path
+        run: |
+          WHL_FILE=$(readlink -f dist/*.whl)
+          echo "whl_file=$WHL_FILE" >> $GITHUB_OUTPUT

      - name: Test import
        shell: bash
-        run: uv run python -c "from llama_index.server import LlamaIndexServer"
+        working-directory: ${{ github.workspace }}
+        env:
+          WHL_FILE: ${{ steps.get_whl_path.outputs.whl_file }}
+        run: |
+          uv run --with $WHL_FILE python -c "from llama_index.server import LlamaIndexServer"
+
+      - name: Check frontend resources is present
+        shell: bash
+        working-directory: ${{ github.workspace }}
+        env:
+          WHL_FILE: ${{ steps.get_whl_path.outputs.whl_file }}
+        run: |
+          uv run --with $WHL_FILE python -c "from llama_index.server.chat_ui import check_ui_resources; check_ui_resources()"

      - name: Upload artifact
        uses: actions/upload-artifact@v4
        with:
          name: llama-index-server
-          path: python/llama-index-server/dist/
+          path: dist/
@@ -7,6 +7,7 @@ build/
 .next/
 out/
 packages/server/server/
+packages/server/project/
 **/playwright-report/
 **/test-results/

@@ -0,0 +1,201 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Repository Overview
+
+Create-llama is a monorepo containing CLI tools and server frameworks for building LlamaIndex-powered applications. The repository combines TypeScript/Node.js and Python components in a unified development environment.
+
+## Architecture
+
+### Monorepo Structure
+
+- **`packages/create-llama/`**: Main CLI tool for scaffolding LlamaIndex applications
+- **`packages/server/`**: TypeScript/Next.js server framework (`@llamaindex/server`)
+- **`python/llama-index-server/`**: Python/FastAPI server framework
+- **Root**: Workspace configuration and shared development tools
+
+### Key Technologies
+
+- **Package Manager**: pnpm with workspace configuration
+- **Build Tools**: bunchee (TypeScript), Next.js, hatchling (Python)
+- **Testing**: Playwright for e2e, pytest for Python
+- **Version Management**: changesets for TypeScript packages, manual for Python
+
+## Development Commands
+
+### Root Level (Monorepo)
+
+```bash
+pnpm dev          # Start all packages in development mode
+pnpm build        # Build all packages
+pnpm lint         # ESLint across TypeScript packages
+pnpm format       # Prettier formatting
+pnpm e2e          # Run end-to-end tests
+```
+
+### Create-llama Package
+
+```bash
+cd packages/create-llama
+npm run build     # Build CLI using bash script and ncc
+npm run dev       # Watch mode development
+npm run e2e       # Playwright tests for generated projects
+npm run clean     # Clean build artifacts and template caches
+```
+
+### TypeScript Server Package
+
+```bash
+cd packages/server
+pnpm dev          # Watch mode with bunchee
+pnpm build        # Multi-step build: ESM/CJS + Next.js + static assets
+pnpm clean        # Clean all build outputs
+```
+
+### Python Server Package
+
+```bash
+cd python/llama-index-server
+uv run generate   # Index data files
+fastapi dev       # Start development server with hot reload
+pytest            # Run test suite
+```
+
+## Template System
+
+The CLI uses a sophisticated template system in `packages/create-llama/templates/`:
+
+### Organization
+
+- **`types/`**: Base project structures (streaming, reflex, llamaindexserver)
+- **`components/`**: Reusable components across frameworks
+  - `engines/` - Chat and agent engines
+  - `loaders/` - File, web, database loaders
+  - `providers/` - AI model configurations
+  - `vectordbs/` - Vector database integrations
+  - `use-cases/` - Workflow implementations
+
+### Development Workflow
+
+- Templates support multiple frameworks (Next.js, Express, FastAPI)
+- Component system allows mix-and-match functionality
+- E2E tests validate generated projects work correctly
+
+## Server Framework Architecture
+
+### TypeScript Server (`@llamaindex/server`)
+
+- **Core**: `LlamaIndexServer` class wrapping Next.js with workflow support
+- **Frontend**: React-based chat UI with shadcn/ui components
+- **API**: `/api/chat` endpoint with streaming responses
+- **Build Process**: Complex multi-step build including static assets for Python integration
+
+### Python Server (`llama-index-server`)
+
+- **Core**: `LlamaIndexServer` class extending FastAPI
+- **Architecture**: Workflow factory pattern for stateless request handling
+- **UI Generation**: AI-powered React component generation from Pydantic schemas
+- **Development**: Hot reloading support with dev mode
+
+## Common Patterns
+
+### Workflow Integration
+
+Both server frameworks use factory patterns:
+
+```typescript
+// TypeScript
+const server = new LlamaIndexServer({
+  workflow: (context) => createWorkflow(context)
+});
+
+// Python
+def create_workflow(chat_request: ChatRequest) -> Workflow:
+    return MyWorkflow(chat_request.messages)
+```
+
+### Event System
+
+Structured events for UI communication:
+
+- **UIEvent**: Custom components with Pydantic/Zod schemas
+- **ArtifactEvent**: Code/documents for Canvas panel
+- **SourceNodesEvent**: Document sources with metadata
+- **AgentRunEvent**: Tool usage and progress tracking
+
+### File Handling
+
+- Both servers auto-mount `data/` and `output/` directories
+- LlamaCloud integration for remote file access
+- Static file serving through framework-specific methods
+
+## Testing Strategy
+
+### E2E Testing
+
+- Playwright tests in `packages/create-llama/e2e/`
+- Tests both Python and TypeScript generated projects
+- Validates CLI generation and application functionality
+
+### Unit Testing
+
+- Python: pytest with comprehensive API and service tests
+- TypeScript: Integrated testing through build process
+
+## Build Process
+
+### Create-llama CLI
+
+1. TypeScript compilation with bash script
+2. ncc bundling for standalone executable
+3. Template validation and caching
+
+### Server Package Build
+
+1. **prebuild**: Clean directories
+2. **build**: bunchee compilation to ESM/CJS
+3. **postbuild**: Next.js preparation and static asset generation
+4. **prepare:py-static**: Python integration assets
+
+### Release Process
+
+```bash
+pnpm release     # Build all + publish npm packages + Python release
+```
+
+## Development Environment Setup
+
+### Prerequisites
+
+- Node.js >=16.14.0
+- Python with uv package manager
+- pnpm for package management
+
+### Common Workflow
+
+1. Clone repository and run `pnpm install`
+2. For CLI development: work in `packages/create-llama/`
+3. For server development: choose TypeScript or Python package
+4. Use `pnpm dev` for concurrent development across packages
+5. Run `pnpm e2e` to validate changes with generated projects
+
+## Special Considerations
+
+### Template Development
+
+- Changes to templates require rebuilding CLI
+- E2E tests validate template functionality across frameworks
+- Template caching system speeds up repeated builds
+
+### Cross-package Dependencies
+
+- Server package builds static assets for Python integration
+- Version synchronization between TypeScript and Python packages
+- Shared UI components and styling across implementations
+
+### Performance
+
+- CLI uses caching for template operations
+- Server frameworks support streaming responses
+- Background processing for file operations and LlamaCloud integration
@@ -57,6 +57,9 @@ export default tseslint.config(
      "**/out/**",
      "**/node_modules/**",
      "**/build/**",
+      "packages/server/server/**",
+      "packages/server/project/**",
+      "packages/server/bin/**",
    ],
  },
 );
@@ -13,7 +13,8 @@
  },
  "license": "MIT",
  "workspaces": [
-    "packages/*"
+    "packages/*",
+    "python/*"
  ],
  "scripts": {
    "dev": "pnpm -r dev",
@@ -24,8 +25,10 @@
    "format:write": "prettier --ignore-unknown --write .",
    "prepare": "husky",
    "new-snapshot": "pnpm -r build && changeset version --snapshot",
-    "new-version": "pnpm -r build && changeset version",
-    "release": "pnpm -r build && changeset publish",
+    "new-version-python": "pnpm --filter @create-llama/llama-index-server new-version",
+    "new-version": "pnpm -r build && changeset version && pnpm new-version-python",
+    "release-python": "pnpm --filter @create-llama/llama-index-server release",
+    "release": "pnpm -r build && changeset publish && pnpm release-python",
    "release-snapshot": "pnpm -r build && changeset publish --tag snapshot"
  },
  "devDependencies": {
@@ -1,5 +1,24 @@
 # create-llama

+## 0.5.19
+
+### Patch Changes
+
+- 5fe9e17: support eject to fully customize next folder
+- b8a1ff6: Support citation for agentic template (Python)
+
+## 0.5.18
+
+### Patch Changes
+
+- 8d59ef0: Add layout_dir config to the generated python code
+
+## 0.5.17
+
+### Patch Changes
+
+- eee3230: feat: support custom layout
+
 ## 0.5.16

 ### Patch Changes
@@ -0,0 +1,108 @@
+# create-llama Package
+
+## Overview
+
+The `create-llama` package is a CLI tool for creating LlamaIndex-powered applications with one command. It's designed as a project generator that scaffolds various types of RAG (Retrieval-Augmented Generation) applications using different frameworks, databases, and AI model providers.
+
+## Package Structure
+
+### Core Files
+
+- **`index.ts`**: Main CLI entry point using Commander.js for argument parsing
+- **`create-app.ts`**: Core application creation logic and orchestration
+- **`package.json`**: Package configuration with binary entry point at `./dist/index.js`
+
+### Key Directories
+
+- **`helpers/`**: Utility functions for package management, file operations, and configuration
+- **`questions/`**: Interactive prompts for user configuration
+- **`templates/`**: Project templates for different frameworks and use cases
+- **`e2e/`**: End-to-end tests using Playwright
+
+## Core Functionality
+
+### CLI Interface
+
+The tool accepts numerous command-line options including:
+
+- Framework selection (`--framework`: nextjs, express, fastapi)
+- Template type (`--template`: streaming, multiagent, reflex, llamaindexserver)
+- Model providers (OpenAI, Anthropic, Groq, Ollama, etc.)
+- Vector databases (none, mongo, pg, pinecone, milvus, etc.)
+- Data sources (files, web URLs, databases)
+- Tools and observability options
+
+### Application Generation Flow
+
+1. **Project validation**: Checks project name validity and directory permissions
+2. **Interactive questioning**: Prompts user for configuration if not provided via CLI
+3. **Template installation**: Copies and configures appropriate templates
+4. **Environment setup**: Creates `.env` files with API keys and configuration
+5. **Dependencies**: Installs packages using detected/specified package manager
+6. **Post-install actions**: Can run the app, open VSCode, or install dependencies
+
+### Template System
+
+Templates are organized by:
+
+- **Framework**: NextJS (frontend), Express (Node backend), FastAPI (Python backend)
+- **Type**: Streaming chat, multiagent workflows, Reflex UI, LlamaIndex server
+- **Components**: Engines, loaders, providers, UI components, observability
+
+### Helper Functions
+
+Key helper modules include:
+
+- **Installation**: Package manager detection and dependency installation
+- **Data sources**: File copying, web scraping, database connection setup
+- **Providers**: Model provider configuration (OpenAI, Anthropic, etc.)
+- **Tools**: Integration with external tools (Wikipedia, weather, code generation)
+- **Environment**: `.env` file generation with API keys and settings
+
+## Development Commands
+
+### Build & Development
+
+- `npm run build`: Build the CLI using bash script
+- `npm run dev`: Watch mode development build
+- `npm run clean`: Clean build artifacts and temporary files
+
+### Testing
+
+- `npm run e2e`: Run all end-to-end tests
+- `npm run e2e:python`: Test Python-specific templates
+- `npm run e2e:typescript`: Test TypeScript-specific templates
+
+### Package Management
+
+- `npm run pack-install`: Create and install local package for testing
+
+## Architecture Notes
+
+### Model Configuration
+
+The tool supports multiple AI providers with a unified `ModelConfig` interface that includes:
+
+- Provider selection and API key management
+- Model and embedding model specification
+- Dimension configuration for embeddings
+
+### Data Source Handling
+
+Flexible data source configuration supporting:
+
+- Local files and directories
+- Web URLs with configurable crawling depth
+- Database connections with custom queries
+- Automatic file downloading and copying
+
+### Template Flexibility
+
+Templates use a component-based system allowing mix-and-match of:
+
+- Different frameworks (NextJS, Express, FastAPI)
+- Various vector databases
+- Multiple observability tools
+- Configurable tools and integrations
+
+This package serves as the foundation for rapidly prototyping and deploying LlamaIndex applications across different technology stacks and use cases.
@@ -1,5 +1,5 @@
 import { expect, test } from "@playwright/test";
-import { ChildProcess } from "child_process";
+import { ChildProcess, execSync } from "child_process";
 import fs from "fs";
 import path from "path";
 import type {
@@ -28,6 +28,7 @@ const templateUseCases = [
  "deep_research",
  "code_generator",
 ];
+const ejectDir = "next";

 for (const useCase of templateUseCases) {
  test.describe(`Test use case ${useCase} ${templateFramework} ${dataSource} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => {
@@ -110,6 +111,28 @@ for (const useCase of templateUseCases) {
      expect(response.ok()).toBeTruthy();
    });

+    test("Should successfully eject, install dependencies and build without errors", async () => {
+      test.skip(
+        templateFramework !== "nextjs" ||
+          useCase !== "code_generator" ||
+          dataSource === "--llamacloud",
+        "Eject test only applies to Next.js framework, code generator use case, and non-llamacloud",
+      );
+
+      // Run eject command
+      execSync("npm run eject", { cwd: path.join(cwd, name) });
+
+      // Verify next directory exists
+      const nextDirExists = fs.existsSync(path.join(cwd, name, ejectDir));
+      expect(nextDirExists).toBeTruthy();
+
+      // Install dependencies in next directory
+      execSync("npm install", { cwd: path.join(cwd, name, ejectDir) });
+
+      // Run build
+      execSync("npm run build", { cwd: path.join(cwd, name, ejectDir) });
+    });
+
    // clean processes
    test.afterAll(async () => {
      appProcess?.kill();
@@ -5,6 +5,7 @@ import { parse, stringify } from "smol-toml";
 import terminalLink from "terminal-link";
 import { isUvAvailable, tryUvSync } from "./uv";

+import { isCI } from "ci-info";
 import { assetRelocator, copy } from "./copy";
 import { templatesDir } from "./dir";
 import { Tool } from "./tools";
@@ -278,6 +279,19 @@ const getAdditionalDependencies = (
    }
  }

+  // If app template is llama-index-server and CI and SERVER_PACKAGE_PATH is set,
+  // add @llamaindex/server to dependencies
+  if (
+    templateType === "llamaindexserver" &&
+    isCI &&
+    process.env.SERVER_PACKAGE_PATH
+  ) {
+    dependencies.push({
+      name: "llama-index-server",
+      version: `@file://${process.env.SERVER_PACKAGE_PATH}`,
+    });
+  }
+
  return dependencies;
 };

@@ -578,6 +592,12 @@ const installLlamaIndexServerTemplate = async ({
    cwd: path.join(templatesDir, "components", "ui", "use-cases", useCase),
  });

+  // Copy layout components to layout folder in root
+  await copy("*", path.join(root, "layout"), {
+    parents: true,
+    cwd: path.join(templatesDir, "components", "ui", "layout"),
+  });
+
  if (useLlamaParse) {
    await copy("index.py", path.join(root, "app"), {
      parents: true,
@@ -42,12 +42,18 @@ const installLlamaIndexServerTemplate = async ({
    rename: assetRelocator,
  });

-  // copy workflow UI components to output/components folder
+  // copy workflow UI components to components folder in root
  await copy("*", path.join(root, "components"), {
    parents: true,
    cwd: path.join(templatesDir, "components", "ui", "use-cases", useCase),
  });

+  // copy layout components to layout folder in root
+  await copy("*", path.join(root, "layout"), {
+    parents: true,
+    cwd: path.join(templatesDir, "components", "ui", "layout"),
+  });
+
  // Override generate.ts if workflow use case doesn't use custom UI
  if (vectorDb === "llamacloud") {
    await copy("generate.ts", path.join(root, "src"), {
@@ -1,6 +1,6 @@
 {
  "name": "create-llama",
-  "version": "0.5.16",
+  "version": "0.5.19",
  "description": "Create LlamaIndex-powered apps with one command",
  "keywords": [
    "rag",
@@ -0,0 +1,40 @@
+"use client";
+
+import { Sparkles, Star } from "lucide-react";
+
+export default function Header() {
+  return (
+    <div className="flex items-center justify-between px-4 pt-2">
+      <div className="flex items-center gap-2">
+        <Sparkles className="size-4" />
+        <h1 className="font-semibold">LlamaIndex App</h1>
+      </div>
+      <div className="flex items-center justify-end gap-4">
+        <div className="flex items-center gap-2">
+          <a
+            href="https://www.llamaindex.ai/"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-sm text-gray-600 hover:text-gray-800 dark:text-gray-400 dark:hover:text-gray-200"
+          >
+            Built by LlamaIndex
+          </a>
+          <img
+            className="h-[24px] w-[24px] rounded-sm"
+            src="/llama.png"
+            alt="Llama Logo"
+          />
+        </div>
+        <a
+          href="https://github.com/run-llama/LlamaIndexTS"
+          target="_blank"
+          rel="noopener noreferrer"
+          className="hover:bg-accent flex items-center gap-2 rounded-md border border-gray-300 px-2 py-1 text-sm"
+        >
+          <Star className="size-4" />
+          Star on GitHub
+        </a>
+      </div>
+    </div>
+  );
+}
@@ -3,9 +3,12 @@ from typing import Optional
 from app.index import get_index
 from llama_index.core.agent.workflow import AgentWorkflow
 from llama_index.core.settings import Settings
-from llama_index.llms.openai import OpenAI
 from llama_index.server.api.models import ChatRequest
 from llama_index.server.tools.index import get_query_engine_tool
+from llama_index.server.tools.index.citation import (
+    CITATION_SYSTEM_PROMPT,
+    enable_citation,
+)


 def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow:
@@ -14,9 +17,16 @@ def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow
        raise RuntimeError(
            "Index not found! Please run `uv run generate` to index the data first."
        )
-    query_tool = get_query_engine_tool(index=index)
+    # Create a query tool with citations enabled
+    query_tool = enable_citation(get_query_engine_tool(index=index))
+
+    # Define the system prompt for the agent
+    # Append the citation system prompt to the system prompt
+    system_prompt = """You are a helpful assistant"""
+    system_prompt += CITATION_SYSTEM_PROMPT
+
    return AgentWorkflow.from_tools_or_functions(
        tools_or_functions=[query_tool],
-        llm=Settings.llm or OpenAI(model="gpt-4o-mini"),
-        system_prompt="You are a helpful assistant.",
+        llm=Settings.llm,
+        system_prompt=system_prompt,
    )
@@ -41,6 +41,14 @@ curl --location 'localhost:3000/api/chat' \
 --data '{ "messages": [{ "role": "user", "content": "What standards for a letter exist?" }] }'
 ```

+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
 ## Learn More

 To learn more about LlamaIndex, take a look at the following resources:
@@ -8,7 +8,7 @@ First, install the dependencies:
 npm install
 ```

-Third, run the development server:
+Second, run the development server:

 ```
 npm run dev
@@ -34,7 +34,7 @@ AI-powered code generator that can help you generate app with a chat interface,

 To update the workflow, you can modify the code in [`workflow.ts`](app/workflow.ts).

-You can start by sending an request on the [chat UI](http://localhost:3000) or you can test the `/api/chat` endpoint with the following curl request:
+You can start by sending a request on the [chat UI](http://localhost:3000) or you can test the `/api/chat` endpoint with the following curl request:

 ```shell
 curl --location 'localhost:3000/api/chat' \
@@ -42,6 +42,14 @@ curl --location 'localhost:3000/api/chat' \
 --data '{ "messages": [{ "role": "user", "content": "Compare the financial performance of Apple and Tesla" }] }'
 ```

+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
 ## Learn More

 To learn more about LlamaIndex, take a look at the following resources:
@@ -53,6 +53,14 @@ curl --location 'localhost:3000/api/chat' \
 --data '{ "messages": [{ "role": "user", "content": "Compare the financial performance of Apple and Tesla" }] }'
 ```

+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
 ## Learn More

 To learn more about LlamaIndex, take a look at the following resources:
@@ -8,7 +8,7 @@ First, install the dependencies:
 npm install
 ```

-Third, run the development server:
+Second, run the development server:

 ```
 npm run dev
@@ -34,7 +34,7 @@ AI-powered document generator that can help you generate documents with a chat i

 To update the workflow, you can modify the code in [`workflow.ts`](app/workflow.ts).

-You can start by sending an request on the [chat UI](http://localhost:3000) or you can test the `/api/chat` endpoint with the following curl request:
+You can start by sending a request on the [chat UI](http://localhost:3000) or you can test the `/api/chat` endpoint with the following curl request:

 ```shell
 curl --location 'localhost:3000/api/chat' \
@@ -42,6 +42,14 @@ curl --location 'localhost:3000/api/chat' \
 --data '{ "messages": [{ "role": "user", "content": "Compare the financial performance of Apple and Tesla" }] }'
 ```

+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
 ## Learn More

 To learn more about LlamaIndex, take a look at the following resources:
@@ -41,6 +41,14 @@ curl --location 'localhost:3000/api/chat' \
 --data '{ "messages": [{ "role": "user", "content": "Generate a financial report that compares the financial performance of Apple and Tesla" }] }'
 ```

+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
 ## Learn More

 To learn more about LlamaIndex, take a look at the following resources:
@@ -1,8 +1,9 @@
+import { OpenAI } from "@llamaindex/openai";
 import { generateEventComponent } from "@llamaindex/server";
 import * as dotenv from "dotenv";
 import "dotenv/config";
 import * as fs from "fs/promises";
-import { LLamaCloudFileService, OpenAI } from "llamaindex";
+import { LLamaCloudFileService } from "llamaindex";
 import * as path from "path";
 import { getIndex } from "./app/data";
 import { initSettings } from "./app/settings";
@@ -8,5 +8,5 @@ from llama_index.llms.openai import OpenAI
 def init_settings():
    if os.getenv("OPENAI_API_KEY") is None:
        raise RuntimeError("OPENAI_API_KEY is missing in environment variables")
-    Settings.llm = OpenAI(model="gpt-4o-mini")
-    Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
+    Settings.llm = OpenAI(model="gpt-4.1")
+    Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")
@@ -16,8 +16,8 @@ def create_app():
        workflow_factory=create_workflow,  # A factory function that creates a new workflow for each request
        ui_config=UIConfig(
            component_dir=COMPONENT_DIR,
-            app_title="Chat App",
            dev_mode=True,  # Please disable this in production
+            layout_dir="layout",
        ),
        logger=logger,
        env="dev",
@@ -12,7 +12,7 @@ dependencies = [
    "pydantic<2.10",
    "aiostream>=0.5.2,<0.6.0",
    "llama-index-core>=0.12.28,<0.13.0",
-    "llama-index-server>=0.1.16,<0.2.0",
+    "llama-index-server>=0.1.17,<0.2.0",
 ]

 [project.optional-dependencies]
@@ -46,6 +46,9 @@ disable_error_code = [ "return-value", "assignment" ]
 module = "app.*"
 ignore_missing_imports = false

+[tool.hatch.metadata]
+allow-direct-references = true
+
 [build-system]
 requires = [ "hatchling>=1.24" ]
-build-backend = "hatchling.build"
+build-backend = "hatchling.build"
@@ -6,7 +6,8 @@
    "generate:datasource": "tsx src/generate.ts datasource",
    "generate:ui": "tsx src/generate.ts ui",
    "dev": "nodemon",
-    "start": "tsx src/index.ts"
+    "start": "tsx src/index.ts",
+    "eject": "llamaindex-server eject"
  },
  "dependencies": {
    "@llamaindex/openai": "~0.4.0",
@@ -8,7 +8,6 @@ initSettings();
 new LlamaIndexServer({
  workflow: workflowFactory,
  uiConfig: {
-    appTitle: "LlamaIndex App",
    componentsDir: "components",
    devMode: true,
  },
@@ -1,5 +1,8 @@
 # server contains Nextjs frontend code (not compiled)
 server/

+# the ejected nextjs project
+project/
+
 # temp is the copy of next folder but without API folder, used to build frontend static files
 temp/
@@ -1,5 +1,20 @@
 # @llamaindex/server

+## 0.2.4
+
+### Patch Changes
+
+- 5fe9e17: support eject to fully customize next folder
+- b8a1ff6: Bump version: chat-ui@0.4.6
+
+## 0.2.3
+
+### Patch Changes
+
+- eee3230: feat: support custom layout
+- 0bc5a0d: Add suggestNextQuestions config
+- 3acec88: chore: bump chat-ui
+
 ## 0.2.2

 ### Patch Changes
@@ -0,0 +1,160 @@
+# @llamaindex/server Package
+
+This package provides a Next.js-based server framework for running LlamaIndex workflows with both API endpoints and a chat UI interface.
+
+## Overview
+
+The `@llamaindex/server` package (`src/`) allows you to quickly launch LlamaIndex Workflows and Agent Workflows as an API server with an optional sophisticated chat UI. It combines a backend API server with a frontend React interface built on Next.js.
+
+## Key Components
+
+### Core Server (src/server.ts)
+
+- **LlamaIndexServer class**: Main server implementation that wraps Next.js
+- Handles workflow factory initialization and UI configuration
+- Manages custom components and layout directories
+- Creates HTTP server with custom routing for chat API
+- Automatically configures client-side config in `public/config.js`
+
+### Chat Handler (src/handlers/chat.ts)
+
+- **handleChat function**: Processes POST requests to `/api/chat`
+- Converts AI SDK messages to LlamaIndex format
+- Manages workflow execution with abort signals
+- Streams responses back to client with optional question suggestions
+- Handles errors and validation
+
+### Workflow Management (src/utils/workflow.ts)
+
+- **runWorkflow function**: Executes workflows with proper event handling
+- Transforms workflow events (tool calls, source nodes) into UI-friendly formats
+- Downloads LlamaCloud files automatically in background
+- Processes agent events and source annotations
+
+### Event System (src/events.ts)
+
+- **Source Events**: For displaying document/file sources with metadata
+- **Agent Events**: For showing agent tool usage and progress
+- **Artifact Events**: For structured data like code/documents sent to Canvas UI
+- Helper functions for converting LlamaIndex data to UI events
+
+### UI Generation (src/utils/gen-ui.ts)
+
+- **generateEventComponent function**: Uses LLM to auto-generate React components
+- Creates workflow for UI planning, aggregation, and code generation
+- Validates generated components against supported dependencies
+- Supports shadcn/ui, lucide-react, tailwind CSS, and LlamaIndex chat-ui
+
+### Types (src/types.ts)
+
+- **WorkflowFactory**: Function signature for creating workflow instances
+- **UIConfig**: Configuration options for chat interface
+- **LlamaIndexServerOptions**: Main server configuration interface
+
+## Next.js Frontend
+
+The `next/` directory contains the React frontend:
+
+### API Routes
+
+- `/api/chat/route.ts`: Main chat endpoint (delegates to handleChat)
+- `/api/components/route.ts`: Serves custom UI components
+- `/api/layout/route.ts`: Serves custom layout components
+- `/api/files/[...slug]/route.ts`: File serving for data/output folders
+
+### UI Components
+
+- Chat interface with message history, streaming responses, and canvas panel
+- Extensible component system for custom workflow events
+- Custom layout support for headers/footers
+- Built with shadcn/ui components and Tailwind CSS
+
+## Build Process
+
+### Development
+
+```bash
+pnpm dev  # Watch mode with bunchee
+```
+
+### Production Build
+
+```bash
+pnpm build  # Multi-step build process
+```
+
+The build process:
+
+1. **prebuild**: Cleans dist, server, and temp directories
+2. **build**: Compiles source with bunchee to ESM/CJS
+3. **postbuild**: Prepares TypeScript server and Python static assets
+4. **prepare:ts-server**: Copies Next.js app, builds CSS, compiles API routes
+5. **prepare:py-static**: Creates static build for Python integration
+
+## Key Features
+
+### Workflow Integration
+
+- Factory pattern for creating workflow instances per request
+- Supports Agent Workflows with startAgentEvent/stopAgentEvent contract
+- Automatic event transformation and streaming
+- Built-in tool call and source node handling
+
+### UI Extensibility
+
+- AI-generated components based on Zod schemas
+- Custom layout sections (header/footer)
+- Canvas panel for artifacts (documents, code)
+- Event aggregation and real-time updates
+
+### File Handling
+
+- Automatic mounting of `data/` and `output/` folders
+- LlamaCloud file downloads in background
+- Static asset serving through Next.js
+
+### Development Features
+
+- Hot reload support for workflow code (beta)
+- Dev mode panel for live code editing
+- TypeScript support throughout
+- Comprehensive error handling
+
+## Configuration
+
+Server configuration through `LlamaIndexServerOptions`:
+
+- `workflow`: Factory function for creating workflow instances
+- `uiConfig.starterQuestions`: Predefined questions for chat interface
+- `uiConfig.componentsDir`: Directory for custom event components
+- `uiConfig.layoutDir`: Directory for custom layout components
+- `uiConfig.llamaCloudIndexSelector`: Enable LlamaCloud integration
+- `uiConfig.devMode`: Enable live code editing
+- `suggestNextQuestions`: Auto-suggest follow-up questions
+
+## Dependencies
+
+### Runtime Dependencies
+
+- Next.js 15+ for server framework
+- React 19+ for UI components
+- LlamaIndex workflow engine
+- Radix UI components (shadcn/ui)
+- AI SDK for streaming responses
+
+### Development Dependencies
+
+- Bunchee for bundling
+- TypeScript for type safety
+- Tailwind CSS for styling
+- PostCSS for CSS processing
+
+## Usage Patterns
+
+1. **Basic Setup**: Create workflow factory, configure UI, start server
+2. **Custom Events**: Define Zod schemas, generate UI components with LLM
+3. **File Integration**: Use data/output folders for document processing
+4. **Development**: Use dev mode for iterative workflow development
+5. **Production**: Build static assets for deployment with Python backend
+
+The package serves as a complete solution for deploying LlamaIndex workflows with professional chat interfaces and extensible UI components.
@@ -30,7 +30,6 @@ const createWorkflow = () => agent({ tools: [wiki()], llm: openai("gpt-4o") });
 new LlamaIndexServer({
  workflow: createWorkflow,
  uiConfig: {
-    appTitle: "LlamaIndex App",
    starterQuestions: ["Who is the first president of the United States?"],
  },
 }).start();
@@ -60,11 +59,12 @@ The `LlamaIndexServer` accepts the following configuration options:

 - `workflow`: A callable function that creates a workflow instance for each request. See [Workflow factory contract](#workflow-factory-contract) for more details.
 - `uiConfig`: An object to configure the chat UI containing the following properties:
-  - `appTitle`: The title of the application (default: `"LlamaIndex App"`)
  - `starterQuestions`: List of starter questions for the chat UI (default: `[]`)
  - `componentsDir`: The directory for custom UI components rendering events emitted by the workflow. The default is undefined, which does not render custom UI components.
+  - `layoutDir`: The directory for custom layout sections. The default value is `layout`. See [Custom Layout](#custom-layout) for more details.
  - `llamaCloudIndexSelector`: Whether to show the LlamaCloud index selector in the chat UI (requires `LLAMA_CLOUD_API_KEY` to be set in the environment variables) (default: `false`)
  - `dev_mode`: When enabled, you can update workflow code in the UI and see the changes immediately. It's currently in beta and only supports updating workflow code at `app/src/workflow.ts`. Please start server in dev mode (`npm run dev`) to use see this reload feature enabled.
+- `suggestNextQuestions`: Whether to suggest next questions after the assistant's response (default: `true`). You can change the prompt for the next questions by setting the `NEXT_QUESTION_PROMPT` environment variable.

 LlamaIndexServer accepts all the configuration options from Nextjs Custom Server such as `port`, `hostname`, `dev`, etc.
 See all Nextjs Custom Server options [here](https://nextjs.org/docs/app/building-your-application/configuring/custom-server).
@@ -186,6 +186,28 @@ Feel free to modify the generated code to match your needs. If you're not satisf

 > Note that `generateEventComponent` is generating JSX code, but you can also provide a TSX file.

+## Custom Layout
+
+LlamaIndex Server supports custom layout for header and footer. To use custom layout, you need to initialize the LlamaIndex server with the `layoutDir` that contains your custom layout files.
+
+```ts
+new LlamaIndexServer({
+  workflow: createWorkflow,
+  uiConfig: {
+    layoutDir: "layout",
+  },
+}).start();
+```
+
+```
+layout/
+  header.tsx
+  footer.tsx
+```
+
+We currently support custom header and footer for the chat interface. The syntax for these files is the same as events components in components directory.
+Note that by default, we are still rendering the default LlamaIndex Header. It's also the fallback when having errors rendering the custom header. Example layout files will be generated in the `layout` directory of your project when creating a new project with `create-llama`.
+
 ### Server Setup

 To use the generated UI components, you need to initialize the LlamaIndex server with the `componentsDir` that contains your custom UI components:
@@ -194,7 +216,6 @@ To use the generated UI components, you need to initialize the LlamaIndex server
 new LlamaIndexServer({
  workflow: createWorkflow,
  uiConfig: {
-    appTitle: "LlamaIndex App",
    componentsDir: "components",
  },
 }).start();
@@ -279,6 +300,23 @@ The server always provides a chat interface at the root path (`/`) with:
 - The server automatically mounts the `data` and `output` folders at `{server_url}{api_prefix}/files/data` (default: `/api/files/data`) and `{server_url}{api_prefix}/files/output` (default: `/api/files/output`) respectively.
 - Your workflows can use both folders to store and access files. By convention, the `data` folder is used for documents that are ingested, and the `output` folder is used for documents generated by the workflow.

+### Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+By default, the ejected project will be in the `next` directory in the current working directory. You can change the output directory by providing custom path after `eject` command:
+
+```bash
+npm eject <path-to-output-directory>
+```
+
+How eject works:
+
+1. Init nextjs project with eslint, prettier, postcss, tailwindcss, shadcn components, etc.
+2. Copy your workflow definition and setting files in src/app/\* to the ejected project in app/api/chat
+3. Copy your components, data, output, storage folders to the ejected project
+4. Copy your current .env file to the ejected project
+5. Clean up files that are no longer needed and update imports
+
 ## API Reference

 - [LlamaIndexServer](https://ts.llamaindex.ai/docs/api/classes/LlamaIndexServer)
@@ -0,0 +1,172 @@
+#!/usr/bin/env node
+
+const fs = require("fs").promises;
+const path = require("path");
+
+// Resolve the project directory in node_modules/@llamaindex/server/project
+// This is the template that used to construct the nextjs project
+const projectDir = path.resolve(__dirname, "../project");
+
+// Resolve the src directory that contains workflow & setting files
+const srcDir = path.join(process.cwd(), "src");
+const srcAppDir = path.join(srcDir, "app");
+const generateFile = path.join(srcDir, "generate.ts");
+const envFile = path.join(process.cwd(), ".env");
+
+// The environment variables that are used as LlamaIndexServer configs
+const SERVER_CONFIG_VARS = [
+  {
+    key: "OPENAI_API_KEY",
+    defaultValue: "<your-openai-api-key>",
+    description: "OpenAI API key",
+  },
+  {
+    key: "SUGGEST_NEXT_QUESTIONS",
+    defaultValue: "true",
+    description: "Whether to suggest next questions (`suggestNextQuestions`)",
+  },
+  {
+    key: "COMPONENTS_DIR",
+    defaultValue: "components",
+    description: "Directory for custom components (`componentsDir`)",
+  },
+  {
+    key: "WORKFLOW_FILE_PATH",
+    defaultValue: "app/api/chat/app/workflow.ts",
+    description: "The path to the workflow file (will be updated in dev mode)",
+  },
+  {
+    key: "NEXT_PUBLIC_USE_COMPONENTS_DIR",
+    defaultValue: "true",
+    description: "Whether to enable components directory feature on frontend",
+  },
+  {
+    key: "NEXT_PUBLIC_DEV_MODE",
+    defaultValue: "true",
+    description: "Whether to enable dev mode (`devMode`)",
+  },
+  {
+    key: "NEXT_PUBLIC_STARTER_QUESTIONS",
+    defaultValue: '["Summarize the document", "What are the key points?"]',
+    description:
+      "Initial questions to display in the chat (`starterQuestions`)",
+  },
+  {
+    key: "NEXT_PUBLIC_SHOW_LLAMACLOUD_SELECTOR",
+    defaultValue: "false",
+    description:
+      "Whether to show LlamaCloud selector for frontend (`llamaCloudIndexSelector`)",
+  },
+];
+
+async function eject() {
+  try {
+    // validate required directories (nextjs project template, src directory, src/app directory)
+    const requiredDirs = [projectDir, srcDir, srcAppDir];
+    for (const dir of requiredDirs) {
+      const exists = await fs
+        .access(dir)
+        .then(() => true)
+        .catch(() => false);
+      if (!exists) {
+        console.error("Error: directory does not exist at", dir);
+        process.exit(1);
+      }
+    }
+
+    // Get destination directory from command line arguments (pnpm eject <path>)
+    const args = process.argv;
+    const outputIndex = args.indexOf("eject");
+    const destDir =
+      outputIndex !== -1 && args[outputIndex + 1]
+        ? path.resolve(args[outputIndex + 1]) // Use provided path after eject
+        : path.join(process.cwd(), "next"); // Default to "next" folder in the current working directory
+
+    // remove destination directory if it exists
+    await fs.rm(destDir, { recursive: true, force: true });
+
+    // create destination directory
+    await fs.mkdir(destDir, { recursive: true });
+
+    // Copy the nextjs project template to the destination directory
+    await fs.cp(projectDir, destDir, { recursive: true });
+
+    // copy src/app/* to destDir/app/api/chat
+    const chatRouteDir = path.join(destDir, "app", "api", "chat");
+    await fs.cp(srcAppDir, path.join(chatRouteDir, "app"), { recursive: true });
+
+    // nextjs project doesn't depend on @llamaindex/server anymore, we need to update the imports in workflow file
+    const workflowFile = path.join(chatRouteDir, "app", "workflow.ts");
+    let workflowContent = await fs.readFile(workflowFile, "utf-8");
+    workflowContent = workflowContent.replace("@llamaindex/server", "../utils");
+    await fs.writeFile(workflowFile, workflowContent);
+
+    // copy generate.ts if it exists
+    const genFilePath = path.join(chatRouteDir, "generate.ts");
+    const genFileExists = await copy(generateFile, genFilePath);
+    if (genFileExists) {
+      // update the import @llamaindex/server in generate.ts
+      let genContent = await fs.readFile(genFilePath, "utf-8");
+      genContent = genContent.replace("@llamaindex/server", "./utils");
+      await fs.writeFile(genFilePath, genContent);
+    }
+
+    // copy folders in root directory if exists
+    const rootFolders = ["components", "data", "output", "storage"];
+    for (const folder of rootFolders) {
+      await copy(path.join(process.cwd(), folder), path.join(destDir, folder));
+    }
+
+    // copy .env if it exists or create a new one
+    const envFileExists = await copy(envFile, path.join(destDir, ".env"));
+    if (!envFileExists) {
+      await fs.writeFile(path.join(destDir, ".env"), "");
+    }
+
+    // update .env file with more server configs
+    let envFileContent = await fs.readFile(path.join(destDir, ".env"), "utf-8");
+    for (const envVar of SERVER_CONFIG_VARS) {
+      const { key, defaultValue, description } = envVar;
+      if (!envFileContent.includes(key)) {
+        // if the key is not exists in the env file, add it
+        envFileContent += `\n# ${description}\n${key}=${defaultValue}\n`;
+      }
+    }
+    await fs.writeFile(path.join(destDir, ".env"), envFileContent);
+
+    // rename gitignore -> .gitignore
+    await fs.rename(
+      path.join(destDir, "gitignore"),
+      path.join(destDir, ".gitignore"),
+    );
+
+    // user can customize layout directory in nextjs project, remove layout api
+    await fs.rm(path.join(destDir, "app", "api", "layout"), {
+      recursive: true,
+      force: true,
+    });
+
+    // remove no-needed files
+    await fs.unlink(path.join(destDir, "public", "config.js"));
+    await fs.unlink(path.join(destDir, "next-build.config.ts"));
+
+    console.log("Successfully ejected @llamaindex/server to", destDir);
+  } catch (error) {
+    console.error("Error during eject:", error.message);
+    process.exit(1);
+  }
+}
+
+// copy src to dest if src exists, return true if src exists
+async function copy(src, dest) {
+  const srcExists = await fs
+    .access(src)
+    .then(() => true)
+    .catch(() => false);
+  if (srcExists) {
+    await fs.cp(src, dest, { recursive: true });
+  }
+  return srcExists;
+}
+
+eject();
@@ -0,0 +1,186 @@
+# LlamaIndex Server Examples
+
+This package contains practical examples demonstrating how to use the `@llamaindex/server` package to build chat applications with LlamaIndex workflows.
+
+## Package Overview
+
+The examples package is a collection of standalone TypeScript applications that showcase different features and capabilities of the LlamaIndex Server framework. Each example can be run independently to demonstrate specific functionality.
+
+## Key Features Demonstrated
+
+### 1. Simple Workflow (`simple-workflow/calculator.ts`)
+
+- **Purpose**: Basic agent workflow with tool integration
+- **Features**: Calculator agent with add tool, starter questions
+- **Key Concepts**: Tool definition with Zod schemas, basic server setup
+
+### 2. Agentic RAG (`agentic-rag/index.ts`)
+
+- **Purpose**: Retrieval-Augmented Generation with document querying
+- **Features**: Vector store index, document ingestion, query engine tool, automatic question suggestions
+- **Key Concepts**: RAG implementation, source node inclusion, embedding models
+
+### 3. Custom Layout (`custom-layout/index.ts` + `layout/header.tsx`)
+
+- **Purpose**: Custom UI components and layout customization
+- **Features**: Weather agent with custom header layout, branded interface
+- **Key Concepts**: Layout directory configuration, React component integration
+
+### 4. Development Mode (`devmode/index.ts` + `src/app/workflow.ts`)
+
+- **Purpose**: Live development and hot reloading capabilities
+- **Features**: Dev mode panel, workflow file hot reloading, separate workflow file structure
+- **Key Concepts**: Development workflow, file watching, modular architecture
+
+## Development Scripts
+
+```bash
+# Type checking
+pnpm typecheck
+
+# Run development server (defaults to simple-workflow/calculator.ts)
+pnpm dev
+
+# Run specific examples
+npx nodemon --exec tsx agentic-rag/index.ts
+npx nodemon --exec tsx custom-layout/index.ts
+npx nodemon --exec tsx devmode/index.ts --ignore src/app/workflow_*.ts  # Dev mode with file watching
+```
+
+## Environment Setup
+
+All examples require OpenAI API access:
+
+```bash
+export OPENAI_API_KEY=your_openai_api_key
+```
+
+## Dependencies
+
+### Core Dependencies
+
+- `@llamaindex/server`: Main server framework (workspace dependency)
+- `@llamaindex/workflow`: Workflow engine for agent creation
+- `@llamaindex/openai`: OpenAI LLM and embedding integrations
+- `@llamaindex/tools`: Tool utilities
+- `@llamaindex/readers`: Document readers
+- `llamaindex`: Core LlamaIndex library
+- `zod`: Schema validation for tools
+
+### Development Dependencies
+
+- `tsx`: TypeScript execution for development
+- `nodemon`: File watching and auto-restart
+- `typescript`: TypeScript compiler
+
+## Architecture Patterns
+
+### Workflow Factory Pattern
+
+All examples use the workflow factory pattern:
+
+```typescript
+const workflowFactory = () => agent({ tools: [...] });
+// or
+const workflowFactory = async () => { /* setup logic */ return agent({ tools: [...] }); };
+```
+
+### Server Configuration
+
+Standard server setup pattern:
+
+```typescript
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  uiConfig: {
+    /* UI configuration */
+  },
+  port: 3000,
+}).start();
+```
+
+### Tool Definition Pattern
+
+Consistent tool creation with Zod schemas:
+
+```typescript
+tool({
+  name: "tool_name",
+  description: "Tool description",
+  parameters: z.object({
+    /* parameters */
+  }),
+  execute: (params) => {
+    /* implementation */
+  },
+});
+```
+
+## Example-Specific Features
+
+### Simple Workflow
+
+- Basic arithmetic operations
+- Minimal setup for learning
+- Demonstrates core workflow concepts
+
+### Agentic RAG
+
+- Document indexing with embeddings
+- Vector similarity search
+- Source node tracking for citations
+- Auto-generated follow-up questions
+
+### Custom Layout
+
+- Custom React components in `layout/` directory
+- Branded header with navigation
+- Layout directory configuration (`layoutDir: "layout"`)
+
+### Dev Mode
+
+- Live code editing in browser
+- Hot reloading of workflow files
+- Separate workflow file organization
+- Development panel UI
+
+## TypeScript Configuration
+
+- Target: ES2022 with bundler module resolution
+- Strict type checking enabled
+- Excludes: `node_modules`, `dist`, `custom-layout/layout` (runtime components)
+- Output: `dist/` directory
+
+## Development Workflow
+
+1. **Choose Example**: Select appropriate example for your use case
+2. **Environment Setup**: Configure OpenAI API key
+3. **Run Development Server**: Use `pnpm dev` or specific nodemon commands
+4. **Access UI**: Open browser at `http://localhost:3000`
+5. **Iterate**: Modify code and see changes in real-time
+
+## Common Patterns
+
+### Agent Creation
+
+All examples use the `agent()` function from `@llamaindex/workflow` with tool arrays.
+
+### UI Configuration
+
+- `starterQuestions`: Predefined questions for user guidance
+- `layoutDir`: Custom layout components directory
+- `devMode`: Enable development features
+- `suggestNextQuestions`: Auto-generate follow-up questions
+
+### Error Handling
+
+Examples demonstrate proper async/await patterns and error handling for LLM operations.
+
+## Integration Points
+
+- **LlamaIndex Core**: Document processing, indexing, querying
+- **OpenAI**: LLM and embedding model integration
+- **React/Next.js**: Frontend UI components and server-side rendering
+- **TypeScript**: Type safety throughout the application stack
+
+This examples package serves as a comprehensive reference for building production-ready chat applications with LlamaIndex workflows.
@@ -35,8 +35,8 @@ export const workflowFactory = async () => {

 new LlamaIndexServer({
  workflow: workflowFactory,
+  suggestNextQuestions: true,
  uiConfig: {
-    appTitle: "LlamaIndex App",
    starterQuestions: ["What is the color of the dog?"],
  },
  port: 3000,
@@ -0,0 +1,27 @@
+import { LlamaIndexServer } from "@llamaindex/server";
+import { agent } from "@llamaindex/workflow";
+import { tool } from "llamaindex";
+import { z } from "zod";
+
+const weatherAgent = agent({
+  tools: [
+    tool({
+      name: "weather",
+      description: "Get the weather in a given city",
+      parameters: z.object({ city: z.string() }),
+      execute: ({ city }) => `The weather in ${city} is sunny`,
+    }),
+  ],
+});
+
+new LlamaIndexServer({
+  workflow: () => weatherAgent,
+  uiConfig: {
+    starterQuestions: [
+      "What is the weather in Tokyo?",
+      "What is the weather in Ho Chi Minh City?",
+    ],
+    layoutDir: "layout",
+  },
+  port: 3000,
+}).start();
@@ -0,0 +1,40 @@
+"use client";
+
+import { Sparkles, Star } from "lucide-react";
+
+export default function Header() {
+  return (
+    <div className="flex items-center justify-between px-4 pt-2">
+      <div className="flex items-center gap-2">
+        <Sparkles className="size-4" />
+        <h1 className="font-semibold">LlamaIndex App</h1>
+      </div>
+      <div className="flex items-center justify-end gap-4">
+        <div className="flex items-center gap-2">
+          <a
+            href="https://www.llamaindex.ai/"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-sm text-gray-600 hover:text-gray-800 dark:text-gray-400 dark:hover:text-gray-200"
+          >
+            Built by LlamaIndex
+          </a>
+          <img
+            className="h-[24px] w-[24px] rounded-sm"
+            src="/llama.png"
+            alt="Llama Logo"
+          />
+        </div>
+        <a
+          href="https://github.com/run-llama/LlamaIndexTS"
+          target="_blank"
+          rel="noopener noreferrer"
+          className="hover:bg-accent flex items-center gap-2 rounded-md border border-gray-300 px-2 py-1 text-sm"
+        >
+          <Star className="size-4" />
+          Star on GitHub
+        </a>
+      </div>
+    </div>
+  );
+}
@@ -6,7 +6,6 @@ First, we need to set `devMode` to `true` in the `uiConfig` of the server.
 new LlamaIndexServer({
  workflow: workflowFactory,
  uiConfig: {
-    appTitle: "Calculator",
    devMode: true,
  },
  port: 3000,
@@ -4,7 +4,6 @@ import { workflowFactory } from "./src/app/workflow";
 new LlamaIndexServer({
  workflow: workflowFactory,
  uiConfig: {
-    appTitle: "Calculator",
    devMode: true,
    starterQuestions: [
      "What is the weather in Tokyo?",
@@ -17,7 +17,6 @@ const calculatorAgent = agent({
 new LlamaIndexServer({
  workflow: () => calculatorAgent,
  uiConfig: {
-    appTitle: "Calculator",
    starterQuestions: ["1 + 1", "2 + 2"],
  },
  port: 3000,
@@ -10,5 +10,5 @@
    "outDir": "dist"
  },
  "include": ["**/*"],
-  "exclude": ["node_modules", "dist"]
+  "exclude": ["node_modules", "dist", "custom-layout/layout"]
 }
@@ -0,0 +1,45 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Next.js](https://nextjs.org/) that is ejected from [`llamaindex-server`](https://github.com/run-llama/create-llama/tree/main/packages/server) via `npm eject` command.
+
+## Quick Start
+
+As this is a Next.js project, you can use the following commands to start the development server:
+
+```bash
+npm install
+npm run dev
+```
+
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
+
+## Useful Commands
+
+- Generate Datasource (in case you're having a `./data` folder): `npm run generate`
+- Typecheck: `npm run typecheck`
+- Lint: `npm run lint`
+- Format: `npm run format`
+- Build & Start: `npm run build && npm run start`
+
+## Deployment
+
+The project can be deployed to any platform that supports Next.js like Vercel.
+
+## Configuration
+
+Your original [`llamaindex-server`](https://github.com/run-llama/create-llama/tree/main/packages/server#configuration-options) configurations have been migrated to a [`.env`](.env) file.
+
+Changing the `.env` file will change the behavior of the application, e.g. for changing the initial questions to display in the chat, you can do:
+
+```
+NEXT_PUBLIC_STARTER_QUESTIONS=['What is the capital of France?']
+```
+
+Alternatively, you can also change the file referencing `process.env.NEXT_PUBLIC_STARTER_QUESTIONS` directly in the source code.
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
+- [LlamaIndexTS Documentation](https://ts.llamaindex.ai) - learn about LlamaIndex (Typescript features).
+
+You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!
@@ -4,16 +4,23 @@ import { type MessageType } from "llamaindex";
 import { NextRequest, NextResponse } from "next/server";

 // import chat utils
-import { toDataStream } from "./utils/stream";
-import { sendSuggestedQuestionsEvent } from "./utils/suggestion";
-import { runWorkflow } from "./utils/workflow";
+import {
+  runWorkflow,
+  sendSuggestedQuestionsEvent,
+  toDataStream,
+} from "./utils";

-// import workflow factory from local file
-import { workflowFactory } from "../../../../app/workflow";
+// import workflow factory and settings from local file
+import { initSettings } from "./app/settings";
+import { workflowFactory } from "./app/workflow";
+
+initSettings();

 export async function POST(req: NextRequest) {
  try {
    const reqBody = await req.json();
+    const suggestNextQuestions = process.env.SUGGEST_NEXT_QUESTIONS === "true";
+
    const { messages } = reqBody as { messages: Message[] };
    const chatHistory = messages.map((message) => ({
      role: message.role as MessageType,
@@ -53,7 +60,9 @@ export async function POST(req: NextRequest) {
            role: "assistant" as MessageType,
            content: completion,
          });
-          await sendSuggestedQuestionsEvent(dataStreamWriter, chatHistory);
+          if (suggestNextQuestions) {
+            await sendSuggestedQuestionsEvent(dataStreamWriter, chatHistory);
+          }
        },
      },
    });
@@ -1,71 +1,9 @@
-import fs from "fs";
-import { NextRequest, NextResponse } from "next/server";
-import path from "path";
-import { promisify } from "util";
+import { NextRequest } from "next/server";
+import { handleComponentRoute } from "../shared/component-handler";

 export async function GET(request: NextRequest) {
  const params = request.nextUrl.searchParams;
-  const componentsDir = params.get("componentsDir") || "components";
-
-  try {
-    const exists = await promisify(fs.exists)(componentsDir);
-    if (!exists) {
-      return NextResponse.json(
-        { error: "Components directory not found" },
-        { status: 404 },
-      );
-    }
-
-    const files = await promisify(fs.readdir)(componentsDir);
-
-    // filter files with valid extensions
-    const validExtensions = [".tsx", ".jsx"];
-    const filteredFiles = files.filter((file) =>
-      validExtensions.includes(path.extname(file)),
-    );
-
-    // filter duplicate components
-    const uniqueFiles = filterDuplicateComponents(filteredFiles);
-
-    const components = await Promise.all(
-      uniqueFiles.map(async (file) => {
-        const filePath = path.join(componentsDir, file);
-        const content = await promisify(fs.readFile)(filePath, "utf-8");
-        return {
-          type: path.basename(file, path.extname(file)),
-          code: content,
-          filename: file,
-        };
-      }),
-    );
-
-    return NextResponse.json(components, { status: 200 });
-  } catch (error) {
-    console.error("Error reading components:", error);
-    return NextResponse.json(
-      { error: "Failed to read components" },
-      { status: 500 },
-    );
-  }
-}
-
-function filterDuplicateComponents(files: string[]) {
-  const compMap = new Map<string, string>();
-
-  for (const file of files) {
-    const type = path.basename(file, path.extname(file));
-
-    if (compMap.has(type)) {
-      const existingComp = compMap.get(type)!;
-      if (file.endsWith(".tsx") && !existingComp.endsWith(".tsx")) {
-        // prefer .tsx files over others
-        console.warn(`Preferring ${file} over ${existingComp}`);
-        compMap.set(type, file);
-      }
-    } else {
-      compMap.set(type, file);
-    }
-  }
-
-  return Array.from(compMap.values());
+  const directory =
+    params.get("componentsDir") || process.env.COMPONENTS_DIR || "components";
+  return handleComponentRoute(directory);
 }
@@ -4,7 +4,8 @@ import { NextRequest, NextResponse } from "next/server";
 import path from "path";
 import { promisify } from "util";

-const DEFAULT_WORKFLOW_FILE_PATH = "src/app/workflow.ts"; // TODO: we can make it as a parameter in server later
+const DEFAULT_WORKFLOW_FILE_PATH =
+  process.env.WORKFLOW_FILE_PATH || "src/app/workflow.ts";

 export async function GET(request: NextRequest) {
  const filePath = DEFAULT_WORKFLOW_FILE_PATH;
@@ -0,0 +1,10 @@
+import { NextRequest } from "next/server";
+import { handleComponentRoute } from "../shared/component-handler";
+
+const LAYOUT_TYPES = ["header", "footer"] as const;
+
+export async function GET(request: NextRequest) {
+  const params = request.nextUrl.searchParams;
+  const directory = params.get("layoutDir") || "layout";
+  return handleComponentRoute(directory, LAYOUT_TYPES);
+}
@@ -0,0 +1,80 @@
+import fs from "fs";
+import { NextResponse } from "next/server";
+import path from "path";
+import { promisify } from "util";
+
+const VALID_EXTENSIONS = [".tsx", ".jsx"];
+
+export type Item = {
+  type: string;
+  filename: string;
+  code: string;
+};
+
+function filterDuplicateFiles(files: string[]): string[] {
+  const fileMap = new Map<string, string>();
+
+  for (const file of files) {
+    const type = path.basename(file, path.extname(file));
+
+    if (fileMap.has(type)) {
+      const existingFile = fileMap.get(type)!;
+      // Prefer .tsx files
+      if (file.endsWith(".tsx") && !existingFile.endsWith(".tsx")) {
+        console.warn(`Preferring ${file} over ${existingFile}`);
+        fileMap.set(type, file);
+      }
+    } else {
+      fileMap.set(type, file);
+    }
+  }
+  return Array.from(fileMap.values());
+}
+
+export async function handleComponentRoute(
+  directory: string,
+  itemTypes?: readonly string[],
+): Promise<NextResponse> {
+  try {
+    const exists = await promisify(fs.exists)(directory);
+    if (!exists) {
+      return NextResponse.json(
+        { error: `Directory not found at ${directory}` },
+        { status: 404 },
+      );
+    }
+
+    const filesInDir = await promisify(fs.readdir)(directory);
+    const validFiles = filesInDir.filter((file) =>
+      VALID_EXTENSIONS.includes(path.extname(file)),
+    );
+    let filesToProcess = filterDuplicateFiles(validFiles);
+
+    if (itemTypes?.length) {
+      // Specific item types provided (e.g., for layouts "header", "footer")
+      filesToProcess = filesToProcess.filter((file) =>
+        itemTypes.includes(path.basename(file, path.extname(file))),
+      );
+    }
+
+    const items: Item[] = await Promise.all(
+      filesToProcess.map(async (file) => {
+        const filePath = path.join(directory, file);
+        const content = await promisify(fs.readFile)(filePath, "utf-8");
+        return {
+          type: path.basename(file, path.extname(file)),
+          code: content,
+          filename: file,
+        };
+      }),
+    );
+
+    return NextResponse.json(items, { status: 200 });
+  } catch (error) {
+    console.error(`Error reading directory ${directory}:`, error);
+    return NextResponse.json(
+      { error: `Failed to read directory ${directory}` },
+      { status: 500 },
+    );
+  }
+}
@@ -60,12 +60,12 @@ function Calendar({
        ...classNames,
      }}
      components={{
-        IconLeft: ({ className, ...props }) => (
-          <ChevronLeft className={cn("size-4", className)} {...props} />
-        ),
-        IconRight: ({ className, ...props }) => (
-          <ChevronRight className={cn("size-4", className)} {...props} />
-        ),
+        Chevron: ({ ...props }) =>
+          props.orientation === "left" ? (
+            <ChevronLeft {...props} className="h-4 w-4" />
+          ) : (
+            <ChevronRight {...props} className="h-4 w-4" />
+          ),
      }}
      {...props}
    />
@@ -1,55 +0,0 @@
-"use client";
-
-import { Sparkles, Star } from "lucide-react";
-import { Button } from "../button";
-import { getConfig } from "../lib/utils";
-
-export function ChatHeader() {
-  return (
-    <div className="flex items-center justify-between px-4 pt-2">
-      <ChatAppTitle />
-      <LlamaIndexLinks />
-    </div>
-  );
-}
-
-function ChatAppTitle() {
-  return (
-    <div className="flex items-center gap-2">
-      <Sparkles className="size-4" />
-      <h1 className="font-semibold">{getConfig("APP_TITLE")}</h1>
-    </div>
-  );
-}
-
-function LlamaIndexLinks() {
-  return (
-    <div className="flex items-center justify-end gap-4">
-      <div className="flex items-center gap-2">
-        <a
-          href="https://www.llamaindex.ai/"
-          target="_blank"
-          rel="noopener noreferrer"
-          className="text-sm text-gray-600 hover:text-gray-800 dark:text-gray-400 dark:hover:text-gray-200"
-        >
-          Built by LlamaIndex
-        </a>
-        <img
-          className="h-[24px] w-[24px] rounded-sm"
-          src="/llama.png"
-          alt="Llama Logo"
-        />
-      </div>
-      <a
-        href="https://github.com/run-llama/LlamaIndexTS"
-        target="_blank"
-        rel="noopener noreferrer"
-      >
-        <Button variant="outline" size="sm">
-          <Star className="mr-2 size-4" />
-          Star on GitHub
-        </Button>
-      </a>
-    </div>
-  );
-}
@@ -8,7 +8,11 @@ import { LlamaCloudSelector } from "./custom/llama-cloud-selector";
 export default function CustomChatInput() {
  const { requestData, isLoading, input } = useChatUI();
  const uploadAPI = getConfig("UPLOAD_API") ?? "";
-  const llamaCloudAPI = getConfig("LLAMA_CLOUD_API") ?? "";
+  const llamaCloudAPI =
+    getConfig("LLAMA_CLOUD_API") ??
+    (process.env.NEXT_PUBLIC_SHOW_LLAMACLOUD_SELECTOR === "true"
+      ? "/api/chat/config/llamacloud"
+      : "");
  const {
    imageUrl,
    setImageUrl,
@@ -6,7 +6,6 @@ import { useEffect, useMemo, useState } from "react";
 import { getConfig } from "../lib/utils";
 import { ResizablePanel, ResizablePanelGroup } from "../resizable";
 import { ChatCanvasPanel } from "./canvas/panel";
-import { ChatHeader } from "./chat-header";
 import { ChatInjection } from "./chat-injection";
 import CustomChatInput from "./chat-input";
 import CustomChatMessages from "./chat-messages";
@@ -14,10 +13,11 @@ import { DynamicEventsErrors } from "./custom/events/dynamic-events-errors";
 import { fetchComponentDefinitions } from "./custom/events/loader";
 import { ComponentDef } from "./custom/events/types";
 import { DevModePanel } from "./dev-mode-panel";
+import { ChatLayout } from "./layout";

 export default function ChatSection() {
  const handler = useChat({
-    api: getConfig("CHAT_API"),
+    api: getConfig("CHAT_API") || "/api/chat",
    onError: (error: unknown) => {
      if (!(error instanceof Error)) throw error;
      let errorMessage: string;
@@ -32,8 +32,7 @@ export default function ChatSection() {
  });
  return (
    <>
-      <div className="flex h-screen w-screen flex-col overflow-hidden">
-        <ChatHeader />
+      <ChatLayout>
        <ChatUI
          handler={handler}
          className="relative flex min-h-0 flex-1 flex-row justify-center gap-4 px-4 py-0"
@@ -44,7 +43,7 @@ export default function ChatSection() {
          </ResizablePanelGroup>
          <DevModePanel />
        </ChatUI>
-      </div>
+      </ChatLayout>
      <ChatInjection />
    </>
  );
@@ -6,7 +6,9 @@ import { getConfig } from "../lib/utils";

 export function ChatStarter({ className }: { className?: string }) {
  const { append, messages, requestData } = useChatUI();
-  const starterQuestions = getConfig("STARTER_QUESTIONS") ?? [];
+  const starterQuestions =
+    getConfig("STARTER_QUESTIONS") ??
+    JSON.parse(process.env.NEXT_PUBLIC_STARTER_QUESTIONS || "[]");

  if (starterQuestions.length === 0 || messages.length > 0) return null;
  return (
@@ -17,7 +17,11 @@ export async function fetchComponentDefinitions(): Promise<{
  components: ComponentDef[];
  errors: string[];
 }> {
-  const endpoint = getConfig("COMPONENTS_API");
+  const endpoint =
+    getConfig("COMPONENTS_API") ??
+    (process.env.NEXT_PUBLIC_USE_COMPONENTS_DIR === "true"
+      ? "/api/components"
+      : undefined);
  if (!endpoint) {
    console.warn("/api/components endpoint is not defined in config");
    return { components: [], errors: [] };
@@ -65,8 +65,14 @@ export function LlamaCloudSelector({
  );

  useEffect(() => {
-    if (!config && getConfig("LLAMA_CLOUD_API")) {
-      fetch(getConfig("LLAMA_CLOUD_API"))
+    const llamaCloudAPI =
+      getConfig("LLAMA_CLOUD_API") ??
+      (process.env.NEXT_PUBLIC_SHOW_LLAMACLOUD_SELECTOR === "true"
+        ? "/api/chat/config/llamacloud"
+        : "");
+
+    if (!config && llamaCloudAPI) {
+      fetch(llamaCloudAPI)
        .then((response) => {
          if (!response.ok) {
            return response.json().then((errorData) => {
@@ -19,7 +19,8 @@ type WorkflowFile = {
 };

 export function DevModePanel() {
-  const devModeEnabled = getConfig("DEV_MODE");
+  const devModeEnabled =
+    getConfig("DEV_MODE") ?? process.env.NEXT_PUBLIC_DEV_MODE === "true";
  if (!devModeEnabled) return null;
  return <DevModePanelComp />;
 }
@@ -0,0 +1,40 @@
+"use client";
+
+import { Sparkles, Star } from "lucide-react";
+
+export function DefaultHeader() {
+  return (
+    <div className="flex items-center justify-between px-4 pt-2">
+      <div className="flex items-center gap-2">
+        <Sparkles className="size-4" />
+        <h1 className="font-semibold">LlamaIndex App</h1>
+      </div>
+      <div className="flex items-center justify-end gap-4">
+        <div className="flex items-center gap-2">
+          <a
+            href="https://www.llamaindex.ai/"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-sm text-gray-600 hover:text-gray-800 dark:text-gray-400 dark:hover:text-gray-200"
+          >
+            Built by LlamaIndex
+          </a>
+          <img
+            className="h-[24px] w-[24px] rounded-sm"
+            src="/llama.png"
+            alt="Llama Logo"
+          />
+        </div>
+        <a
+          href="https://github.com/run-llama/LlamaIndexTS"
+          target="_blank"
+          rel="noopener noreferrer"
+          className="hover:bg-accent flex items-center gap-2 rounded-md border border-gray-300 px-2 py-1 text-sm"
+        >
+          <Star className="size-4" />
+          Star on GitHub
+        </a>
+      </div>
+    </div>
+  );
+}
@@ -0,0 +1,135 @@
+"use client";
+
+import { Loader2 } from "lucide-react";
+import React, { FunctionComponent, useEffect, useState } from "react";
+import { getConfig } from "../../lib/utils";
+import { DynamicComponentErrorBoundary } from "../custom/events/error-boundary";
+import { parseComponent } from "../custom/events/loader";
+import { DefaultHeader } from "./header";
+
+type LayoutFile = {
+  type: "header" | "footer";
+  code: string;
+  filename: string;
+};
+
+type LayoutComponent = LayoutFile & {
+  component?: FunctionComponent | null;
+  error?: string;
+};
+
+export function ChatLayout({ children }: { children: React.ReactNode }) {
+  const [layoutComponents, setLayoutComponents] = useState<LayoutComponent[]>(
+    [],
+  );
+  const [isRendering, setIsRendering] = useState(false);
+  const [errors, setErrors] = useState<string[]>([]);
+
+  useEffect(() => {
+    const loadLayout = async () => {
+      setIsRendering(true);
+      const layoutFiles = await fetchLayoutFiles();
+      if (layoutFiles.length) {
+        const layoutComponents = await parseLayoutComponents(layoutFiles);
+        setLayoutComponents(layoutComponents);
+        setErrors((errors) => [
+          ...errors,
+          ...(layoutComponents.map((c) => c.error).filter(Boolean) as string[]),
+        ]);
+      }
+      setIsRendering(false);
+    };
+
+    loadLayout();
+  }, []);
+
+  const handleError = (error: string) => {
+    setErrors((prev) => [...prev, error]);
+  };
+
+  const getLayoutCode = (type: "header" | "footer") => {
+    return layoutComponents.find((c) => c.type === type)?.component;
+  };
+
+  if (isRendering) {
+    return (
+      <div className="flex h-screen w-screen flex-col items-center justify-center overflow-hidden">
+        <Loader2 className="text-muted-foreground animate-spin" />
+      </div>
+    );
+  }
+
+  const uniqueErrors = [...new Set(errors)];
+
+  return (
+    <div className="flex h-screen w-screen flex-col overflow-hidden">
+      {uniqueErrors.length > 0 && (
+        <div className="w-full bg-yellow-100 px-4 py-2 text-black/70">
+          <h2 className="mb-2 font-semibold">
+            Errors happened while rendering the layout:
+          </h2>
+          {uniqueErrors.map((error) => (
+            <div key={error} className="text-sm">
+              {error}
+            </div>
+          ))}
+        </div>
+      )}
+
+      <LayoutRenderer
+        component={getLayoutCode("header")}
+        onError={handleError}
+        fallback={<DefaultHeader />}
+      />
+
+      {children}
+
+      <LayoutRenderer
+        component={getLayoutCode("footer")}
+        onError={handleError}
+      />
+    </div>
+  );
+}
+
+function LayoutRenderer({
+  component,
+  onError,
+  fallback,
+}: {
+  component?: FunctionComponent | null;
+  onError: (error: string) => void;
+  fallback?: React.ReactNode;
+}) {
+  if (!component) return fallback;
+  return (
+    <DynamicComponentErrorBoundary onError={onError} fallback={fallback}>
+      {React.createElement(component)}
+    </DynamicComponentErrorBoundary>
+  );
+}
+
+async function parseLayoutComponents(layoutFiles: LayoutFile[]) {
+  const layoutComponents: LayoutComponent[] = await Promise.all(
+    layoutFiles.map(async (layoutFile) => {
+      const result = await parseComponent(layoutFile.code, layoutFile.filename);
+      return { ...layoutFile, ...result };
+    }),
+  );
+  return layoutComponents;
+}
+
+async function fetchLayoutFiles(): Promise<LayoutFile[]> {
+  try {
+    const layoutApi = getConfig("LAYOUT_API");
+    if (!layoutApi) return [];
+    const response = await fetch(layoutApi);
+    const layoutFiles: LayoutFile[] = await response.json();
+    return layoutFiles;
+  } catch (error) {
+    const errorMessage =
+      error instanceof Error ? error.message : "Unknown error";
+    console.warn("Error fetching layout files: ", errorMessage);
+    return [];
+  }
+}
@@ -1,6 +1,5 @@
 window.LLAMAINDEX = {
  CHAT_API: "/api/chat",
-  APP_TITLE: "Deep Research App",
  LLAMA_CLOUD_API: undefined,
  STARTER_QUESTIONS: [
    "Research about Apple and Tesla revenue",
@@ -1,7 +1,7 @@
 {
  "name": "@llamaindex/server",
  "description": "LlamaIndex Server",
-  "version": "0.2.2",
+  "version": "0.2.4",
  "type": "module",
  "main": "./dist/index.cjs",
  "module": "./dist/index.js",
@@ -19,8 +19,13 @@
  },
  "files": [
    "dist",
-    "server"
+    "server",
+    "project",
+    "bin"
  ],
+  "bin": {
+    "llamaindex-server": "./bin/eject.cjs"
+  },
  "repository": {
    "type": "git",
    "url": "git+https://github.com/run-llama/LlamaIndexTS.git",
@@ -28,10 +33,11 @@
  },
  "scripts": {
    "dev": "bunchee --watch",
-    "clean": "rm -rf ./dist ./server next/.next next/out ./temp",
+    "clean": "rm -rf ./dist ./server ./project next/.next next/out ./temp",
    "prebuild": "pnpm clean",
    "build": "bunchee",
-    "postbuild": "pnpm prepare:ts-server && pnpm prepare:py-static",
+    "postbuild": "pnpm prepare:nextjs && pnpm prepare:ts-server && pnpm prepare:py-static",
+    "prepare:nextjs": "cp -r ./next ./project && cp -r ./src/utils ./project/app/api/chat && cp -r ./project-config/* ./project/",
    "prepare:ts-server": "pnpm copy:next-src && pnpm build:css && pnpm build:api",
    "prepare:py-static": "pnpm prepare:static && pnpm build:static && pnpm copy:static",
    "copy:next-src": "cp -r ./next ./server",
@@ -59,7 +65,7 @@
    "@babel/traverse": "^7.27.0",
    "@babel/types": "^7.27.0",
    "@hookform/resolvers": "^5.0.1",
-    "@llamaindex/chat-ui": "0.4.4",
+    "@llamaindex/chat-ui": "0.4.6",
    "@radix-ui/react-accordion": "^1.2.3",
    "@radix-ui/react-alert-dialog": "^1.1.7",
    "@radix-ui/react-aspect-ratio": "^1.1.3",
@@ -97,7 +103,7 @@
    "next": "^15.3.0",
    "next-themes": "^0.4.3",
    "react": "^19.1.0",
-    "react-day-picker": "8.10.1",
+    "react-day-picker": "9.7.0",
    "react-dom": "^19.1.0",
    "react-hook-form": "^7.55.0",
    "react-resizable-panels": "^2.1.7",
@@ -0,0 +1,34 @@
+import { FlatCompat } from "@eslint/eslintrc";
+import { dirname } from "path";
+import { fileURLToPath } from "url";
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = dirname(__filename);
+
+const compat = new FlatCompat({
+  baseDirectory: __dirname,
+});
+
+const eslintConfig = [
+  ...compat.extends("next/core-web-vitals", "next/typescript", "prettier"),
+  {
+    rules: {
+      "@typescript-eslint/no-explicit-any": "off",
+      "@typescript-eslint/no-unused-vars": "off",
+      "react-hooks/exhaustive-deps": "off",
+      "@next/next/no-img-element": "off",
+      "@next/next/no-assign-module-variable": "off",
+    },
+  },
+  {
+    ignores: [
+      "**/.next/**",
+      "**/node_modules/**",
+      "prettier.config.mjs",
+      "eslint.config.mjs",
+      "postcss.config.js",
+    ],
+  },
+];
+
+export default eslintConfig;
@@ -26,6 +26,7 @@ yarn-error.log*

 # local env files
 .env*.local
+.env

 # vercel
 .vercel
@@ -35,5 +36,6 @@ yarn-error.log*
 next-env.d.ts

 output/
+storage/

 !lib/
@@ -0,0 +1,100 @@
+{
+  "name": "nextjs-project",
+  "description": "Next.js project with full feature set of @llamaindex/server",
+  "private": true,
+  "version": "0.0.1",
+  "type": "module",
+  "scripts": {
+    "dev": "next dev",
+    "build": "next build",
+    "start": "next start",
+    "lint": "next lint",
+    "format": "prettier --ignore-unknown --cache --check .",
+    "format:write": "prettier --ignore-unknown --write .",
+    "typecheck": "tsc --noEmit",
+    "generate": "tsx app\\api\\chat\\generate.ts"
+  },
+  "devDependencies": {
+    "@eslint/eslintrc": "^3",
+    "@next/eslint-plugin-next": "^15.3.2",
+    "@tailwindcss/postcss": "^4",
+    "@types/babel__standalone": "^7.1.9",
+    "@types/babel__traverse": "^7.20.7",
+    "@types/node": "^20",
+    "@types/react": "^19",
+    "@types/react-dom": "^19",
+    "eslint": "^9",
+    "eslint-config-next": "^15.1.3",
+    "eslint-config-prettier": "^9.1.0",
+    "eslint-plugin-react-hooks": "^5.2.0",
+    "prettier": "^3.2.5",
+    "prettier-plugin-organize-imports": "^4.1.0",
+    "prettier-plugin-tailwindcss": "^0.6.11",
+    "tailwindcss": "^4",
+    "tsx": "^4.19.3",
+    "tw-animate-css": "1.2.5",
+    "typescript": "^5"
+  },
+  "dependencies": {
+    "@babel/parser": "^7.27.0",
+    "@babel/standalone": "^7.27.0",
+    "@babel/traverse": "^7.27.0",
+    "@babel/types": "^7.27.0",
+    "@hookform/resolvers": "^5.0.1",
+    "@llamaindex/chat-ui": "0.4.5",
+    "@llamaindex/env": "~0.1.30",
+    "@llamaindex/openai": "~0.4.0",
+    "@llamaindex/readers": "~3.1.4",
+    "@llamaindex/tools": "~0.0.11",
+    "@llamaindex/workflow": "~1.1.3",
+    "@radix-ui/react-accordion": "^1.2.3",
+    "@radix-ui/react-alert-dialog": "^1.1.7",
+    "@radix-ui/react-aspect-ratio": "^1.1.3",
+    "@radix-ui/react-avatar": "^1.1.4",
+    "@radix-ui/react-checkbox": "^1.1.5",
+    "@radix-ui/react-collapsible": "^1.1.3",
+    "@radix-ui/react-context-menu": "^2.2.7",
+    "@radix-ui/react-dialog": "^1.1.2",
+    "@radix-ui/react-dropdown-menu": "^2.1.7",
+    "@radix-ui/react-hover-card": "^1.1.7",
+    "@radix-ui/react-label": "^2.1.0",
+    "@radix-ui/react-menubar": "^1.1.7",
+    "@radix-ui/react-navigation-menu": "^1.2.6",
+    "@radix-ui/react-popover": "^1.1.7",
+    "@radix-ui/react-progress": "^1.1.3",
+    "@radix-ui/react-radio-group": "^1.2.4",
+    "@radix-ui/react-scroll-area": "^1.2.4",
+    "@radix-ui/react-select": "^2.1.6",
+    "@radix-ui/react-separator": "^1.1.3",
+    "@radix-ui/react-slider": "^1.2.1",
+    "@radix-ui/react-slot": "^1.1.2",
+    "@radix-ui/react-switch": "^1.1.4",
+    "@radix-ui/react-tabs": "^1.1.3",
+    "@radix-ui/react-toggle": "^1.1.3",
+    "@radix-ui/react-toggle-group": "^1.1.3",
+    "@radix-ui/react-tooltip": "^1.1.4",
+    "ai": "^4.2.0",
+    "class-variance-authority": "^0.7.1",
+    "clsx": "^2.1.1",
+    "cmdk": "^1.1.1",
+    "date-fns": "^4.1.0",
+    "dotenv": "^16.5.0",
+    "embla-carousel-react": "^8.6.0",
+    "input-otp": "^1.4.2",
+    "llamaindex": "~0.11.0",
+    "lucide-react": "^0.460.0",
+    "next": "^15.3.0",
+    "next-themes": "^0.4.3",
+    "react": "^19.1.0",
+    "react-day-picker": "9.7.0",
+    "react-dom": "^19.1.0",
+    "react-hook-form": "^7.55.0",
+    "react-resizable-panels": "^2.1.7",
+    "recharts": "^2.15.2",
+    "sonner": "^2.0.3",
+    "tailwind-merge": "^2.6.0",
+    "vaul": "^1.1.2",
+    "zod": "^3.23.8",
+    "zod-to-json-schema": "^3.23.3"
+  }
+}
@@ -0,0 +1,3 @@
+export default {
+  plugins: ["prettier-plugin-organize-imports", "prettier-plugin-tailwindcss"],
+};
@@ -16,6 +16,7 @@ export const handleChat = async (
  req: IncomingMessage,
  res: ServerResponse,
  workflowFactory: WorkflowFactory,
+  suggestNextQuestions: boolean,
 ) => {
  try {
    const body = await parseRequestBody(req);
@@ -53,7 +54,9 @@ export const handleChat = async (
            role: "assistant" as MessageType,
            content: completion,
          });
-          await sendSuggestedQuestionsEvent(dataStreamWriter, chatHistory);
+          if (suggestNextQuestions) {
+            await sendSuggestedQuestionsEvent(dataStreamWriter, chatHistory);
+          }
        },
      },
    });
@@ -1,4 +1,5 @@
-export * from "./events";
 export * from "./server";
 export * from "./types";
+export * from "./utils/events";
 export { generateEventComponent } from "./utils/gen-ui";
+export * from "./utils/prompts";
@@ -18,13 +18,17 @@ export class LlamaIndexServer {
  app: ReturnType<typeof next>;
  workflowFactory: () => Promise<Workflow> | Workflow;
  componentsDir?: string | undefined;
+  layoutDir: string;
+  suggestNextQuestions: boolean;

  constructor(options: LlamaIndexServerOptions) {
-    const { workflow, ...nextAppOptions } = options;
+    const { workflow, suggestNextQuestions, ...nextAppOptions } = options;
    this.app = next({ dev, dir: nextDir, ...nextAppOptions });
    this.port = nextAppOptions.port ?? parseInt(process.env.PORT || "3000", 10);
    this.workflowFactory = workflow;
    this.componentsDir = options.uiConfig?.componentsDir;
+    this.layoutDir = options.uiConfig?.layoutDir ?? "layout";
+    this.suggestNextQuestions = suggestNextQuestions ?? true;

    if (this.componentsDir) {
      this.createComponentsDir(this.componentsDir);
@@ -35,24 +39,25 @@ export class LlamaIndexServer {

  private modifyConfig(options: LlamaIndexServerOptions) {
    const { uiConfig } = options;
-    const appTitle = uiConfig?.appTitle ?? "LlamaIndex App";
    const starterQuestions = uiConfig?.starterQuestions ?? [];
    const llamaCloudApi =
      uiConfig?.llamaCloudIndexSelector && getEnv("LLAMA_CLOUD_API_KEY")
        ? "/api/chat/config/llamacloud"
        : undefined;
    const componentsApi = this.componentsDir ? "/api/components" : undefined;
+    const layoutApi = this.layoutDir ? "/api/layout" : undefined;
    const devMode = uiConfig?.devMode ?? false;

    // content in javascript format
    const content = `
      window.LLAMAINDEX = {
        CHAT_API: '/api/chat',
-        APP_TITLE: ${JSON.stringify(appTitle)},
        LLAMA_CLOUD_API: ${JSON.stringify(llamaCloudApi)},
        STARTER_QUESTIONS: ${JSON.stringify(starterQuestions)},
        COMPONENTS_API: ${JSON.stringify(componentsApi)},
-        DEV_MODE: ${JSON.stringify(devMode)}
+        LAYOUT_API: ${JSON.stringify(layoutApi)},
+        DEV_MODE: ${JSON.stringify(devMode)},
+        SUGGEST_NEXT_QUESTIONS: ${JSON.stringify(this.suggestNextQuestions)}
      }
    `;
    fs.writeFileSync(configFile, content);
@@ -77,7 +82,12 @@ export class LlamaIndexServer {
        // because of https://github.com/vercel/next.js/discussions/79402 we can't use route.ts here, so we need to call this custom route
        // when calling `pnpm eject`, the user will get an equivalent route at [path to chat route.ts]
        // make sure to keep its semantic in sync with handleChat
-        return handleChat(req, res, this.workflowFactory);
+        return handleChat(
+          req,
+          res,
+          this.workflowFactory,
+          this.suggestNextQuestions,
+        );
      }

      if (
@@ -88,6 +98,10 @@ export class LlamaIndexServer {
        query.componentsDir = this.componentsDir;
      }

+      if (pathname === "/api/layout" && req.method === "GET") {
+        query.layoutDir = this.layoutDir;
+      }
+
      const handle = this.app.getRequestHandler();
      handle(req, res, { ...parsedUrl, query });
    });
@@ -13,9 +13,9 @@ export type WorkflowFactory = (
 export type NextAppOptions = Parameters<typeof next>[0];

 export type UIConfig = {
-  appTitle?: string;
  starterQuestions?: string[];
  componentsDir?: string;
+  layoutDir?: string;
  llamaCloudIndexSelector?: boolean;
  devMode?: boolean;
 };
@@ -23,4 +23,5 @@ export type UIConfig = {
 export type LlamaIndexServerOptions = NextAppOptions & {
  workflow: WorkflowFactory;
  uiConfig?: UIConfig;
+  suggestNextQuestions?: boolean;
 };
@@ -0,0 +1,8 @@
+export * from "./events";
+export * from "./file";
+export * from "./gen-ui";
+export * from "./prompts";
+export * from "./request";
+export * from "./stream";
+export * from "./suggestion";
+export * from "./workflow";
@@ -0,0 +1,14 @@
+export const NEXT_QUESTION_PROMPT = `You're a helpful assistant! 
+Your task is to suggest the next question that user might ask. 
+Here is the conversation history
+---------------------
+{conversation}
+---------------------
+Given the conversation history, please give me 3 questions that user might ask next!
+Your answer should be wrapped in three sticks which follows the following format:
+\`\`\`
+<question 1>
+<question 2>
+<question 3>
+\`\`\`
+`;
@@ -1,19 +1,7 @@
+import { getEnv } from "@llamaindex/env";
 import type { DataStreamWriter } from "ai";
 import { type ChatMessage, Settings } from "llamaindex";
-
-const NEXT_QUESTION_PROMPT = `You're a helpful assistant! Your task is to suggest the next question that user might ask. 
-Here is the conversation history
---------------------
-{conversation}
---------------------
-Given the conversation history, please give me 3 questions that user might ask next!
-Your answer should be wrapped in three sticks which follows the following format:
-\`\`\`
-<question 1>
-<question 2>
-<question 3>
-\`\`\`
-`;
+import { NEXT_QUESTION_PROMPT } from "./prompts";

 export const sendSuggestedQuestionsEvent = async (
  streamWriter: DataStreamWriter,
@@ -32,10 +20,8 @@ export async function generateNextQuestions(conversation: ChatMessage[]) {
  const conversationText = conversation
    .map((message) => `${message.role}: ${message.content}`)
    .join("\n");
-  const message = NEXT_QUESTION_PROMPT.replace(
-    "{conversation}",
-    conversationText,
-  );
+  const promptTemplate = getEnv("NEXT_QUESTION_PROMPT") || NEXT_QUESTION_PROMPT;
+  const message = promptTemplate.replace("{conversation}", conversationText);

  try {
    const response = await Settings.llm.complete({ prompt: message });
@@ -19,7 +19,7 @@ import {
  toAgentRunEvent,
  toSourceEvent,
  type SourceEventNode,
-} from "../events";
+} from "./events";
 import { downloadFile } from "./file";

 export async function runWorkflow(
@@ -181,8 +181,8 @@ importers:
        specifier: ^5.0.1
        version: 5.0.1(react-hook-form@7.56.1(react@19.1.0))
      '@llamaindex/chat-ui':
-        specifier: 0.4.4
-        version: 0.4.4(@babel/runtime@7.27.0)(@codemirror/autocomplete@6.18.6)(@codemirror/language@6.11.0)(@codemirror/lint@6.8.5)(@codemirror/search@6.5.10)(@codemirror/state@6.5.2)(@codemirror/theme-one-dark@6.1.2)(@codemirror/view@6.36.7)(@types/react-dom@19.1.2(@types/react@19.1.2))(@types/react@19.1.2)(codemirror@6.0.1)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
+        specifier: 0.4.6
+        version: 0.4.6(@babel/runtime@7.27.0)(@codemirror/autocomplete@6.18.6)(@codemirror/language@6.11.0)(@codemirror/lint@6.8.5)(@codemirror/search@6.5.10)(@codemirror/state@6.5.2)(@codemirror/theme-one-dark@6.1.2)(@codemirror/view@6.36.7)(@types/react-dom@19.1.2(@types/react@19.1.2))(@types/react@19.1.2)(codemirror@6.0.1)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
      '@llamaindex/env':
        specifier: ~0.1.30
        version: 0.1.30
@@ -301,8 +301,8 @@ importers:
        specifier: ^19.1.0
        version: 19.1.0
      react-day-picker:
-        specifier: 8.10.1
-        version: 8.10.1(date-fns@4.1.0)(react@19.1.0)
+        specifier: 9.7.0
+        version: 9.7.0(react@19.1.0)
      react-dom:
        specifier: ^19.1.0
        version: 19.1.0(react@19.1.0)
@@ -402,6 +402,12 @@ importers:
        specifier: ^5.3.2
        version: 5.8.3

+  python/llama-index-server:
+    dependencies:
+      '@llamaindex/server':
+        specifier: workspace:*
+        version: link:../../packages/server
+
 packages:

  '@ai-sdk/provider-utils@2.2.7':
@@ -607,6 +613,9 @@ packages:
    peerDependencies:
      '@bufbuild/protobuf': ^2.2.0

+  '@date-fns/tz@1.2.0':
+    resolution: {integrity: sha512-LBrd7MiJZ9McsOgxqWX7AaxrDjcFVjWH/tIKJd7pnR7McaslGYOP1QmmiBXdJH/H/yLCT+rcQ7FaPBUxRGUtrg==}
+
  '@discoveryjs/json-ext@0.6.3':
    resolution: {integrity: sha512-4B4OijXeVNOPZlYA2oEwWOTkzyltLao+xbotHQeqN++Rv27Y6s818+n2Qkp8q+Fxhn0t/5lA5X1Mxktud8eayQ==}
    engines: {node: '>=14.17.0'}
@@ -1180,8 +1189,8 @@ packages:
      zod:
        optional: true

-  '@llamaindex/chat-ui@0.4.4':
-    resolution: {integrity: sha512-sE3mJxlmAV3eiIaqOnioUNYYBLJJL8sy2mdFP4xXDf3PfkcEn0wT2Om/ButpVgIXHlPG9lONtv8mzw7hWq2Atg==}
+  '@llamaindex/chat-ui@0.4.6':
+    resolution: {integrity: sha512-XvJEv/rv//8vY9Z4RosbmTyPDQFyVaWlQFe0zrJ4inz+aYqHhYtEiSCmQGgPQG+NqWStlTwpOpCye1jy4mWciQ==}
    peerDependencies:
      react: ^18.2.0 || ^19.0.0 || ^19.0.0-rc

@@ -3250,6 +3259,9 @@ packages:
    resolution: {integrity: sha512-BS8PfmtDGnrgYdOonGZQdLZslWIeCGFP9tpan0hi1Co2Zr2NKADsvGYA8XxuG/4UWgJ6Cjtv+YJnB6MM69QGlQ==}
    engines: {node: '>= 0.4'}

+  date-fns-jalali@4.1.0-0:
+    resolution: {integrity: sha512-hTIP/z+t+qKwBDcmmsnmjWTduxCg+5KfdqWQvb2X/8C9+knYY6epN/pfxdDuyVlSVeFz0sM5eEfwIUQ70U4ckg==}
+
  date-fns@4.1.0:
    resolution: {integrity: sha512-Ukq0owbQXxa/U3EGtsdVBkR1w7KOQ5gIBqdH2hkvknzZPYvBxb/aa6E8L7tmjFtkwZBu3UXBbjIgPo/Ez4xaNg==}

@@ -5352,11 +5364,11 @@ packages:
    resolution: {integrity: sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==}
    hasBin: true

-  react-day-picker@8.10.1:
-    resolution: {integrity: sha512-TMx7fNbhLk15eqcMt+7Z7S2KF7mfTId/XJDjKE8f+IUcFn0l08/kI4FiYTL/0yuOLmEcbR4Fwe3GJf/NiiMnPA==}
+  react-day-picker@9.7.0:
+    resolution: {integrity: sha512-urlK4C9XJZVpQ81tmVgd2O7lZ0VQldZeHzNejbwLWZSkzHH498KnArT0EHNfKBOWwKc935iMLGZdxXPRISzUxQ==}
+    engines: {node: '>=18'}
    peerDependencies:
-      date-fns: ^2.28.0 || ^3.0.0
-      react: ^16.8.0 || ^17.0.0 || ^18.0.0
+      react: '>=16.8.0'

  react-dom@19.1.0:
    resolution: {integrity: sha512-Xs1hdnE+DyKgeHJeJznQmYMIBG3TKIHJJT95Q58nHLSrElKlGQqDTR2HQ9fx5CN/Gk6Vh/kupBTDLU11/nDk/g==}
@@ -6806,6 +6818,8 @@ snapshots:
    dependencies:
      '@bufbuild/protobuf': 2.3.0

+  '@date-fns/tz@1.2.0': {}
+
  '@discoveryjs/json-ext@0.6.3': {}

  '@e2b/code-interpreter@1.5.0':
@@ -7205,7 +7219,7 @@ snapshots:
      p-retry: 6.2.1
      zod: 3.24.3

-  '@llamaindex/chat-ui@0.4.4(@babel/runtime@7.27.0)(@codemirror/autocomplete@6.18.6)(@codemirror/language@6.11.0)(@codemirror/lint@6.8.5)(@codemirror/search@6.5.10)(@codemirror/state@6.5.2)(@codemirror/theme-one-dark@6.1.2)(@codemirror/view@6.36.7)(@types/react-dom@19.1.2(@types/react@19.1.2))(@types/react@19.1.2)(codemirror@6.0.1)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
+  '@llamaindex/chat-ui@0.4.6(@babel/runtime@7.27.0)(@codemirror/autocomplete@6.18.6)(@codemirror/language@6.11.0)(@codemirror/lint@6.8.5)(@codemirror/search@6.5.10)(@codemirror/state@6.5.2)(@codemirror/theme-one-dark@6.1.2)(@codemirror/view@6.36.7)(@types/react-dom@19.1.2(@types/react@19.1.2))(@types/react@19.1.2)(codemirror@6.0.1)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
    dependencies:
      '@codemirror/lang-css': 6.3.1
      '@codemirror/lang-html': 6.4.9
@@ -9437,6 +9451,8 @@ snapshots:
      es-errors: 1.3.0
      is-data-view: 1.0.2

+  date-fns-jalali@4.1.0-0: {}
+
  date-fns@4.1.0: {}

  debug@3.2.7:
@@ -11889,9 +11905,11 @@ snapshots:
      minimist: 1.2.8
      strip-json-comments: 2.0.1

-  react-day-picker@8.10.1(date-fns@4.1.0)(react@19.1.0):
+  react-day-picker@9.7.0(react@19.1.0):
    dependencies:
+      '@date-fns/tz': 1.2.0
      date-fns: 4.1.0
+      date-fns-jalali: 4.1.0-0
      react: 19.1.0

  react-dom@19.1.0(react@19.1.0):
@@ -1,3 +1,4 @@
 packages:
  - "packages/*"
  - "packages/server/examples"
+  - "python/*"
@@ -5,6 +5,7 @@
 **/venv
 **/env
 **/llama-index-server.egg-info
+llama_index/server/resources/ui

 # Jupyter files
 **/*.ipynb
@@ -0,0 +1,17 @@
+# @create-llama/llama-index-server
+
+## 0.1.18
+
+### Patch Changes
+
+- b8a1ff6: Show agent widget in UI when making a tool call
+- b8a1ff6: Support citation for query engine tool
+- Updated dependencies [5fe9e17]
+- Updated dependencies [b8a1ff6]
+  - @llamaindex/server@0.2.4
+
+## 0.1.17
+
+### Patch Changes
+
+- 91c35cf: Add suggestNextQuestions config
@@ -0,0 +1,245 @@
+# LlamaIndex Server (Python)
+
+## Overview
+
+The `llama-index-server` package is a FastAPI-based server framework for deploying LlamaIndex Workflows and Agent Workflows as a high-performance API server with an optional chat UI. It provides a complete environment for running LlamaIndex workflows with both API endpoints and a user interface for interaction.
+
+## Package Structure
+
+### Core Components
+- **`llama_index/server/server.py`**: Main `LlamaIndexServer` class extending FastAPI
+- **`llama_index/server/__init__.py`**: Package exports (`LlamaIndexServer`, `UIConfig`, `UIEvent`)
+- **`pyproject.toml`**: Package configuration with dependencies and build settings
+
+### Key Directories
+- **`api/`**: FastAPI routers, models, and request handling
+- **`services/`**: Business logic for file handling, LlamaCloud integration, and UI generation
+- **`tools/`**: Document generation, interpreter tools, and index querying utilities
+- **`gen_ui/`**: AI-powered UI component generation system
+- **`resources/`**: Static assets and bundled UI files
+- **`examples/`**: Sample workflows demonstrating different features
+
+## Core Functionality
+
+### LlamaIndexServer Class
+Main server implementation that extends FastAPI with workflow-specific features:
+- **Workflow Factory Pattern**: Creates workflow instances per request using factory functions
+- **UI Configuration**: Manages chat interface, custom components, and layout directories
+- **File Serving**: Automatically mounts `data/` and `output/` directories
+- **Development Mode**: Enables CORS, verbose logging, and hot reloading
+
+### Chat API (`api/routers/chat.py`)
+- **Endpoint**: `/api/chat` for chat interactions
+- **Streaming Responses**: Real-time workflow execution with Vercel-compatible streaming
+- **Message Handling**: Converts between API and LlamaIndex message formats
+- **Background Tasks**: File downloads and asynchronous processing
+- **LlamaCloud Integration**: Optional index selector for cloud-based retrieval
+
+### Event System (`api/models.py`)
+Structured event types for workflow communication:
+- **`UIEvent`**: Custom UI component rendering with Pydantic data models
+- **`ArtifactEvent`**: Code and document artifacts for Canvas panel display
+- **`SourceNodesEvent`**: Document sources with metadata and file URLs
+- **`AgentRunEvent`**: Agent tool usage and progress tracking
+
+### UI Generation (`gen_ui/main.py`)
+AI-powered component generation using LLM workflows:
+- **`GenUIWorkflow`**: Multi-step process for creating React components
+- **Planning Phase**: Analyzes event schemas to design UI layouts
+- **Aggregation Logic**: Groups events for optimized rendering
+- **Code Generation**: Creates shadcn/ui components with proper imports
+- **Validation**: Ensures generated code uses only supported dependencies
+
+## Development Environment
+
+### Dependencies
+```toml
+# Core FastAPI server with standard extensions
+fastapi[standard]>=0.115.11,<1.0.0
+
+# LlamaIndex core and workflow engine
+llama-index-core>=0.12.28,<1.0.0
+
+# File handling and cloud integration
+llama-index-readers-file>=0.4.6,<1.0.0
+llama-index-indices-managed-llama-cloud>=0.6.3,<1.0.0
+
+# HTTP requests and caching
+requests>=2.32.3,<3.0.0
+cachetools>=5.5.2,<6.0.0
+pydantic-settings>=2.8.1,<3.0.0
+```
+
+### Development Dependencies
+- **Testing**: pytest, pytest-asyncio, pytest-mock for comprehensive testing
+- **Code Quality**: black, ruff, mypy, pylint for code formatting and linting
+- **Documentation**: jupyter, markdown for examples and documentation
+- **Integrations**: e2b-code-interpreter, llama-cloud for extended functionality
+
+### Build System
+- **Backend**: Hatchling for Python package building
+- **Artifacts**: Includes `llama_index/server/resources` for bundled UI assets
+- **Type Checking**: MyPy with strict settings for type safety
+
+## Configuration Options
+
+### Server Configuration
+```python
+LlamaIndexServer(
+    workflow_factory=create_workflow,  # Required: factory function
+    env="dev",                        # Environment: "dev" enables CORS and UI
+    ui_config={                       # Optional UI configuration
+        "enabled": True,              # Enable chat interface
+        "starter_questions": [...],   # Predefined user prompts
+        "component_dir": "components", # Custom UI components directory
+        "layout_dir": "layout",       # Custom layout sections directory
+        "dev_mode": True,             # Enable live code editing
+        "llamacloud_index_selector": False, # LlamaCloud integration
+    },
+    suggest_next_questions=True,      # Auto-generate follow-up questions
+    verbose=True,                     # Enable detailed logging
+    api_prefix="/api",               # API route prefix
+    server_url="http://localhost:8000", # Deployment URL
+)
+```
+
+### Workflow Factory Contract
+```python
+def create_workflow(chat_request: ChatRequest) -> Workflow:
+    # Access to request information for initialization
+    return MyCustomWorkflow(chat_request.messages)
+
+# Workflow input parameters (StartEvent):
+# - user_msg: str - Current user message
+# - chat_history: List[ChatMessage] - Previous conversation messages
+```
+
+## API Endpoints
+
+### Default Routes
+- **`/api/chat`**: Main chat interaction endpoint with streaming responses
+- **`/api/files/data/*`**: Static file serving from data directory
+- **`/api/files/output/*`**: Generated file serving from output directory
+- **`/api/components`**: Custom UI component serving (if configured)
+- **`/api/layout`**: Custom layout component serving (if configured)
+- **`/api/chat/config/llamacloud`**: LlamaCloud configuration (if enabled)
+
+### Development Routes (Dev Mode)
+- **`/api/dev/*`**: Live code editing and hot reloading endpoints
+
+## UI System
+
+### Chat Interface
+When enabled (`ui_config.enabled=True`), provides:
+- **Real-time Chat**: WebSocket-like streaming with message history
+- **Starter Questions**: Configurable prompts to guide users
+- **Canvas Panel**: Dedicated area for code and document artifacts
+- **Custom Components**: React components for workflow-specific events
+- **Custom Layout**: Configurable header/footer sections
+
+### Component Generation
+Automated UI component creation for workflow events:
+- **Event Analysis**: Parses Pydantic schemas to understand data structure
+- **Design Planning**: LLM generates layout descriptions based on event types
+- **Code Generation**: Creates React components using shadcn/ui and Tailwind CSS
+- **Dependency Validation**: Ensures only supported libraries are used
+
+### Supported UI Dependencies
+- **React**: Core framework with hooks and state management
+- **shadcn/ui**: Complete component library (Button, Card, Table, etc.)
+- **Lucide React**: Icon library for visual elements
+- **Tailwind CSS**: Utility-first styling with `cn` helper
+- **LlamaIndex Chat UI**: Markdown rendering and specialized widgets
+
+## File Handling
+
+### Directory Structure
+```
+project/
+├── data/           # Input documents and ingestion files
+├── output/         # Generated files and workflow outputs
+├── components/     # Custom UI components (optional)
+├── layout/         # Custom layout sections (optional)
+└── .ui/           # Downloaded UI static files
+```
+
+### File Serving
+- **Automatic Mounting**: `data/` and `output/` directories served at `/api/files/`
+- **URL Generation**: Metadata-based file URL creation for source nodes
+- **LlamaCloud Integration**: Background downloading of cloud-hosted files
+- **Static Assets**: UI resources bundled with package installation
+
+## Development Features
+
+### Hot Reloading (Beta)
+```python
+# Enable development mode
+app = LlamaIndexServer(
+    workflow_factory=create_workflow,
+    env="dev",                    # Required for dev features
+    ui_config={"dev_mode": True}, # Enable live editing
+)
+```
+- **Live Code Editing**: Modify workflow code in browser interface
+- **Automatic Restart**: FastAPI dev mode integration for instant updates
+- **File Watching**: Monitors `app/workflow.py` for changes
+
+### Logging and Debugging
+- **Verbose Mode**: Detailed request/response logging
+- **Error Handling**: Comprehensive exception catching and reporting
+- **Stream Monitoring**: Real-time event tracking during workflow execution
+
+## Integration Points
+
+### LlamaIndex Core
+- **Workflow Engine**: Full support for Workflow and AgentWorkflow classes
+- **Message Types**: Native ChatMessage and MessageRole compatibility
+- **Node Processing**: Automatic source node extraction and URL generation
+- **Tool Integration**: Function tools and external service connections
+
+### FastAPI Ecosystem
+- **Middleware**: CORS, authentication, and custom request processing
+- **Background Tasks**: Asynchronous file operations and processing
+- **Static Files**: Efficient serving of UI assets and generated content
+- **API Documentation**: Automatic OpenAPI/Swagger documentation generation
+
+### External Services
+- **LlamaCloud**: Cloud-based indexing and retrieval services
+- **File Readers**: Support for various document formats via LlamaIndex readers
+- **Code Interpreters**: Integration with E2B and other execution environments
+
+## Examples and Templates
+
+### Simple Workflow
+Basic agent with tool integration and starter questions for user guidance.
+
+### Agentic RAG
+Document retrieval system with vector indexing, query processing, and source citations.
+
+### Custom Layout
+Branded interface with custom header components and layout customization.
+
+### Development Mode
+Live code editing with hot reloading and separate workflow file organization.
+
+## Best Practices
+
+### Server Setup
+1. **Environment Variables**: Use `.env` files for API keys and configuration
+2. **Development vs Production**: Proper environment separation with `env` parameter
+3. **Resource Management**: Monitor memory usage with large document collections
+4. **Error Handling**: Implement comprehensive logging and exception handling
+
+### Workflow Design
+1. **Factory Pattern**: Use factory functions for stateless workflow creation
+2. **Event Emission**: Leverage `UIEvent` and `ArtifactEvent` for rich user experience
+3. **Message Handling**: Process chat history appropriately in workflow logic
+4. **Tool Integration**: Follow LlamaIndex patterns for external service connections
+
+### UI Development
+1. **Component Organization**: Structure custom components in dedicated directories
+2. **Event Schemas**: Design clear Pydantic models for UI generation
+3. **Layout Consistency**: Use shared layout components across workflows
+4. **Performance**: Consider event aggregation for large data sets
+
+This package provides a comprehensive foundation for deploying production-ready LlamaIndex applications with professional chat interfaces, extensible UI components, and robust API endpoints.
@@ -44,7 +44,6 @@ app = LlamaIndexServer(
    workflow_factory=create_workflow,  # Supports Workflow or AgentWorkflow
    env="dev",  # Enable development mode
    ui_config={ # Configure the chat UI, optional
-        "app_title": "Weather Bot",
        "starter_questions": ["What is the weather in LA?", "Will it rain in SF?"],
    },
    verbose=True
@@ -78,12 +77,13 @@ The LlamaIndexServer accepts the following configuration parameters:
 - `env`: Environment setting ('dev' enables CORS and UI by default)
 - `ui_config`: UI configuration as a dictionary or UIConfig object with options:
  - `enabled`: Whether to enable the chat UI (default: True)
-  - `app_title`: The title of the chat application (default: "LlamaIndex Server")
  - `starter_questions`: List of starter questions for the chat UI (default: None)
  - `ui_path`: Path for downloaded UI static files (default: ".ui")
  - `component_dir`: The directory for custom UI components rendering events emitted by the workflow. The default is None, which does not render custom UI components.
+  - `layout_dir`: The directory for custom layout sections. The default value is `layout`. See [Custom Layout](https://github.com/run-llama/create-llama/blob/main/python/llama-index-server/docs/custom_layout.md) for more details.
  - `llamacloud_index_selector`: Whether to show the LlamaCloud index selector in the chat UI (default: False). Requires `LLAMA_CLOUD_API_KEY` to be set.
  - `dev_mode`: When enabled, you can update workflow code in the UI and see the changes immediately. It's currently in beta and only supports updating workflow code at `app/workflow.py`. You might also need to set `env="dev"` and start the server with the reload feature enabled.
+- `suggest_next_questions`: Whether to suggest next questions after the assistant's response (default: True). You can change the prompt for the next questions by setting the `NEXT_QUESTION_PROMPT` environment variable. The default prompt used is defined in  `llama_index.server.prompts.SUGGEST_NEXT_QUESTION_PROMPT`.
 - `verbose`: Enable verbose logging
 - `api_prefix`: API route prefix (default: "/api")
 - `server_url`: The deployment URL of the server (default is None)
@@ -0,0 +1,22 @@
+# Custom Layout
+
+LlamaIndex Server supports custom layout for header and footer. To use custom layout, you need to initialize the LlamaIndex server with the `layout_dir` that contains your custom layout files.
+
+```python
+server = LlamaIndexServer(
+    workflow_factory=your_workflow,
+    ui_config={
+        "layout_dir": "path/to/layout",
+    },
+    include_ui=True
+)
+```
+
+```
+layout/
+  header.tsx
+  footer.tsx
+```
+
+We currently support custom header and footer for the chat interface. The syntax for these files is the same as events components in components directory (see [Custom UI Component](./custom_ui_component.md) for more details).
+Note that by default, we are still rendering the default LlamaIndex Header. It's also the fallback when having errors rendering the custom header. Example layout files will be generated in the `layout` directory of your project when creating a new project with `create-llama`.
@@ -0,0 +1,40 @@
+"use client";
+
+import { Sparkles, Star } from "lucide-react";
+
+export default function Header() {
+  return (
+    <div className="flex items-center justify-between px-4 pt-2">
+      <div className="flex items-center gap-2">
+        <Sparkles className="size-4" />
+        <h1 className="font-semibold">Artifact Workflow</h1>
+      </div>
+      <div className="flex items-center justify-end gap-4">
+        <div className="flex items-center gap-2">
+          <a
+            href="https://www.llamaindex.ai/"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-sm text-gray-600 hover:text-gray-800 dark:text-gray-400 dark:hover:text-gray-200"
+          >
+            Built by LlamaIndex
+          </a>
+          <img
+            className="h-[24px] w-[24px] rounded-sm"
+            src="/llama.png"
+            alt="Llama Logo"
+          />
+        </div>
+        <a
+          href="https://github.com/run-llama/LlamaIndexTS"
+          target="_blank"
+          rel="noopener noreferrer"
+          className="hover:bg-accent flex items-center gap-2 rounded-md border border-gray-300 px-2 py-1 text-sm"
+        >
+          <Star className="size-4" />
+          Star on GitHub
+        </a>
+      </div>
+    </div>
+  );
+}
@@ -23,12 +23,12 @@ def create_app() -> FastAPI:
    app = LlamaIndexServer(
        workflow_factory=create_workflow,
        ui_config=UIConfig(
-            app_title="Artifact",
            starter_questions=[
                "Write a simple calculator app",
                "Write a guideline on how to use LLM effectively",
            ],
            component_dir="components",
+            layout_dir="layout",
        ),
    )
    return app
@@ -0,0 +1,113 @@
+import os
+from typing import List, Optional
+
+from fastapi import FastAPI
+from llama_index.core.agent.workflow import AgentWorkflow
+from llama_index.core.query_engine.retriever_query_engine import RetrieverQueryEngine
+from llama_index.core.settings import Settings
+from llama_index.core.tools import QueryEngineTool, ToolMetadata
+from llama_index.llms.openai import OpenAI
+from llama_index.server import LlamaIndexServer, UIConfig
+from llama_index.server.api.models import ChatRequest
+from llama_index.server.services.llamacloud import LlamaCloudIndex, get_index
+from llama_index.server.tools.index.citation import (
+    CITATION_SYSTEM_PROMPT,
+    enable_citation,
+)
+
+# Please set the following environment variables to use LlamaCloud
+if os.getenv("LLAMA_CLOUD_API_KEY") is None:
+    raise ValueError("LLAMA_CLOUD_API_KEY is not set")
+if os.getenv("LLAMA_CLOUD_PROJECT_NAME") is None:
+    raise ValueError("LLAMA_CLOUD_PROJECT_NAME is not set")
+if os.getenv("LLAMA_CLOUD_INDEX_NAME") is None:
+    raise ValueError("LLAMA_CLOUD_INDEX_NAME is not set")
+
+Settings.llm = OpenAI(model="gpt-4.1")
+
+
+def get_tools(index: LlamaCloudIndex) -> List[QueryEngineTool]:
+    """
+    Get the tools for the given index.
+    """
+
+    chunk_retriever = index.as_retriever(
+        retrieval_mode="chunks",
+        rerank_top_n=15,
+        dense_similarity_top_k=1,
+    )
+    doc_retriever = index.as_retriever(
+        retrieval_mode="files_via_content",
+        files_top_k=1,
+    )
+
+    # You can either create query engine with CitationSynthesizer and NodeCitationProcessor
+    # or use the enable_citation function to enable citation for the query engine.
+    chunk_engine = RetrieverQueryEngine.from_args(
+        retriever=chunk_retriever,
+        llm=Settings.llm,
+    )
+    doc_engine = RetrieverQueryEngine.from_args(
+        retriever=doc_retriever,
+        llm=Settings.llm,
+    )
+
+    chunk_tool = QueryEngineTool(
+        query_engine=chunk_engine,
+        metadata=ToolMetadata(
+            name="chunk_query_engine",
+            description=(
+                "Get answer from specific chunk of a given document. Best used for lower-level questions that require specific information from a given document."
+                "Do NOT use if the answer can be found in the entire document. Use the file_query_engine instead for that purpose"
+            ),
+        ),
+    )
+    doc_tool = QueryEngineTool(
+        query_engine=doc_engine,
+        metadata=ToolMetadata(
+            name="file_query_engine",
+            description=(
+                "Get answer from entire document as context.  Best used for higher-level summarization questions."
+                "Do NOT use if the answer can be found in a specific chunk of a given document. Use the chunk_query_engine instead for that purpose"
+            ),
+        ),
+    )
+
+    return [enable_citation(chunk_tool), enable_citation(doc_tool)]
+
+
+def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow:
+    index = get_index(chat_request=chat_request)
+    if index is None:
+        raise RuntimeError("Index not found!")
+
+    # Append the citation system prompt to the system prompt
+    system_prompt = """
+    You are a helpful assistant that has access to a knowledge base.
+    """
+    system_prompt += CITATION_SYSTEM_PROMPT
+    return AgentWorkflow.from_tools_or_functions(
+        tools_or_functions=get_tools(index),
+        system_prompt=system_prompt,
+    )
+
+
+def create_app() -> FastAPI:
+    app = LlamaIndexServer(
+        workflow_factory=create_workflow,
+        env="dev",
+        suggest_next_questions=False,
+        ui_config=UIConfig(
+            llamacloud_index_selector=True,  # to select different indexes in the UI
+        ),
+    )
+    return app
+
+
+app = create_app()
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
@@ -7,11 +7,12 @@ from llama_index.server import LlamaIndexServer, UIConfig
 def create_app() -> FastAPI:
    app = LlamaIndexServer(
        workflow_factory=create_workflow,
+        suggest_next_questions=True,
+        env="dev",
        ui_config=UIConfig(
-            app_title="Artifact",
            starter_questions=[
-                "Tell me a funny joke.",
-                "Tell me some jokes about AI.",
+                "Tell me a funny joke",
+                "Tell me some jokes about AI",
            ],
            component_dir="components",
            dev_mode=True,  # To show the dev UI, should disable this in production
@@ -1,3 +1,4 @@
+from llama_index.server.api.callbacks.agent_call_tool import AgentCallTool
 from llama_index.server.api.callbacks.base import EventCallback
 from llama_index.server.api.callbacks.llamacloud import LlamaCloudFileDownload
 from llama_index.server.api.callbacks.source_nodes import SourceNodesFromToolCall
@@ -10,4 +11,5 @@ __all__ = [
    "SourceNodesFromToolCall",
    "SuggestNextQuestions",
    "LlamaCloudFileDownload",
+    "AgentCallTool",
 ]
@@ -0,0 +1,26 @@
+import logging
+from typing import Any
+
+from llama_index.core.agent.workflow.workflow_events import ToolCall, ToolCallResult
+from llama_index.server.api.callbacks.base import EventCallback
+from llama_index.server.api.models import AgentRunEvent
+
+logger = logging.getLogger("uvicorn")
+
+
+class AgentCallTool(EventCallback):
+    """
+    Adapter for convert tool call events to agent run events.
+    """
+
+    async def run(self, event: Any) -> Any:
+        if isinstance(event, ToolCall) and not isinstance(event, ToolCallResult):
+            return AgentRunEvent(
+                name="Agent",
+                msg=f"Calling tool: {event.tool_name} with: {event.tool_kwargs}",
+            )
+        return event
+
+    @classmethod
+    def from_default(cls, *args: Any, **kwargs: Any) -> "AgentCallTool":
+        return cls()
@@ -1,31 +1,51 @@
-from typing import Any
+import logging
+from typing import Any, List, Optional

 from llama_index.core.agent.workflow.workflow_events import ToolCallResult
+from llama_index.core.schema import NodeWithScore
 from llama_index.server.api.callbacks.base import EventCallback
 from llama_index.server.api.models import SourceNodesEvent

+logger = logging.getLogger(__name__)
+

 class SourceNodesFromToolCall(EventCallback):
    """
    Extract source nodes from the query tool output.
-
-    Args:
-        query_tool_name: The name of the tool that queries the index.
-                         default is "query_index"
    """

-    def __init__(self, query_tool_name: str = "query_index"):
-        self.query_tool_name = query_tool_name
+    def __init__(self, tool_name: Optional[str] = None):
+        # backward compatibility
+        if tool_name is not None:
+            logger.warning(
+                "tool_name has been deprecated. It's now detected by the tool output."
+            )

-    def transform_tool_call_result(self, event: ToolCallResult) -> SourceNodesEvent:
-        source_nodes = event.tool_output.raw_output.source_nodes
-        return SourceNodesEvent(nodes=source_nodes)
+    def _get_source_nodes(self, event: ToolCallResult) -> Optional[List[NodeWithScore]]:
+        # If result is not error
+        if event.tool_output.is_error:
+            return None
+        # If result is not error, check if source nodes are in the tool output
+        raw_output = event.tool_output.raw_output
+        if hasattr(raw_output, "source_nodes"):
+            source_nodes = raw_output.source_nodes
+            # Verify if source_nodes is List[NodeWithScore]
+            if isinstance(source_nodes, list) and all(
+                isinstance(node, NodeWithScore) for node in source_nodes
+            ):
+                return source_nodes
+            else:
+                return None
+        else:
+            return None

    async def run(self, event: Any) -> Any:
+        events = [event]
        if isinstance(event, ToolCallResult):
-            if event.tool_name == self.query_tool_name:
-                return event, self.transform_tool_call_result(event)
-        return event
+            source_nodes = self._get_source_nodes(event)
+            if source_nodes is not None:
+                events.append(SourceNodesEvent(nodes=source_nodes))
+        return events

    @classmethod
    def from_default(cls, *args: Any, **kwargs: Any) -> "SourceNodesFromToolCall":
@@ -3,23 +3,17 @@ import os
 from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Union

-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, field_validator

 from llama_index.core.schema import NodeWithScore
 from llama_index.core.types import ChatMessage, MessageRole
 from llama_index.core.workflow import Event
 from llama_index.server.settings import server_settings
+from llama_index.server.utils import llamacloud

 logger = logging.getLogger("uvicorn")


-class ChatConfig(BaseModel):
-    next_question_suggestions: bool = Field(
-        default=True,
-        description="Whether to suggest next questions",
-    )
-
-
 class ChatAPIMessage(BaseModel):
    role: MessageRole
    content: str
@@ -32,7 +26,6 @@ class ChatAPIMessage(BaseModel):
 class ChatRequest(BaseModel):
    messages: List[ChatAPIMessage]
    data: Optional[Any] = None
-    config: Optional[ChatConfig] = ChatConfig()

    @field_validator("messages")
    def validate_messages(cls, v: List[ChatAPIMessage]) -> List[ChatAPIMessage]:
@@ -111,11 +104,8 @@ class SourceNodes(BaseModel):
        file_name = metadata.get("file_name")

        if file_name and url_prefix:
-            # file_name exists and file server is configured
-            pipeline_id = metadata.get("pipeline_id")
-            if pipeline_id:
-                # file is from LlamaCloud
-                file_name = f"{pipeline_id}${file_name}"
+            if llamacloud.is_llamacloud_file(metadata):
+                file_name = llamacloud.get_local_file_name(metadata)
                return f"{url_prefix}/output/llamacloud/{file_name}"
            is_private = metadata.get("private", "false") == "true"
            if is_private:
@@ -1,9 +1,13 @@
 from llama_index.server.api.routers.chat import chat_router
-from llama_index.server.api.routers.ui import custom_components_router
+from llama_index.server.api.routers.ui import (
+    custom_components_router,
+    custom_layout_router,
+)
 from llama_index.server.api.routers.dev import dev_router

 __all__ = [
    "chat_router",
    "custom_components_router",
+    "custom_layout_router",
    "dev_router",
 ]
@@ -6,7 +6,6 @@ from typing import AsyncGenerator, Callable, Union

 from fastapi import APIRouter, BackgroundTasks, HTTPException
 from fastapi.responses import StreamingResponse
-
 from llama_index.core.agent.workflow.workflow_events import (
    AgentInput,
    AgentSetup,
@@ -14,6 +13,7 @@ from llama_index.core.agent.workflow.workflow_events import (
 )
 from llama_index.core.workflow import StopEvent, Workflow
 from llama_index.server.api.callbacks import (
+    AgentCallTool,
    EventCallback,
    LlamaCloudFileDownload,
    SourceNodesFromToolCall,
@@ -28,6 +28,7 @@ from llama_index.server.services.llamacloud import LlamaCloudFileService
 def chat_router(
    workflow_factory: Callable[..., Workflow],
    logger: logging.Logger,
+    suggest_next_questions: bool = True,
 ) -> APIRouter:
    router = APIRouter(prefix="/chat")

@@ -53,10 +54,11 @@ def chat_router(
            )

            callbacks: list[EventCallback] = [
+                AgentCallTool(),
                SourceNodesFromToolCall(),
                LlamaCloudFileDownload(background_tasks),
            ]
-            if request.config and request.config.next_question_suggestions:
+            if suggest_next_questions:
                callbacks.append(SuggestNextQuestions(request))
            stream_handler = StreamHandler(
                workflow_handler=workflow_handler,
@@ -14,7 +14,23 @@ def custom_components_router(

    @router.get("")
    async def components() -> List[ComponentDefinition]:
-        custom_ui = CustomUI(component_dir=component_dir, logger=logger)
-        return custom_ui.get_components()
+        custom_ui = CustomUI(logger=logger)
+        return custom_ui.get_components(directory=component_dir)
+
+    return router
+
+
+def custom_layout_router(
+    layout_dir: str,
+    logger: logging.Logger,
+) -> APIRouter:
+    router = APIRouter(prefix="/layout")
+
+    @router.get("")
+    async def layout() -> List[ComponentDefinition]:
+        custom_ui = CustomUI(logger=logger)
+        return custom_ui.get_components(
+            directory=layout_dir, filter_types=["header", "footer"]
+        )

    return router
@@ -1,55 +1,87 @@
+import importlib.resources
 import logging
 import shutil
 from pathlib import Path
 from typing import Optional

-import requests
-
-CHAT_UI_VERSION = "0.2.1"
+PACKAGE_NAME = "llama_index.server.resources"
+RESOURCE_DIR_NAME = "ui"


-def download_chat_ui(
+def check_ui_resources() -> None:
+    """
+    Checks if the UI resources directory exists in the specified package and lists its contents.
+    Raises a FileNotFoundError with a clear message if the directory is missing.
+    """
+    try:
+        _ = importlib.resources.files(PACKAGE_NAME).joinpath(RESOURCE_DIR_NAME)
+    except Exception as e:
+        raise Exception("UI resources not found in bundled package") from e
+
+
+def copy_bundled_chat_ui(
    logger: Optional[logging.Logger] = None, target_path: str = ".ui"
 ) -> None:
+    # Check if the UI resources directory exists
+    check_ui_resources()
+
    if logger is None:
        logger = logging.getLogger("uvicorn")
-    path = Path(target_path)
-    temp_dir = _download_package(_get_download_link(CHAT_UI_VERSION))
-    _copy_ui_files(temp_dir, path)
-    logger.info("Chat UI downloaded and copied to static folder")

+    destination_path = Path(target_path)
+    destination_path.mkdir(parents=True, exist_ok=True)

-def _get_download_link(version: str) -> str:
-    """Get the download link for the chat UI from the npm registry."""
-    return f"https://registry.npmjs.org/@llamaindex/server/-/server-{version}.tgz"
-
-
-def _download_package(url: str) -> Path:
-    """Download tar.gz file and extract all files into a temporary directory."""
-    import io
-    import tarfile
-    import tempfile
-
-    response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
-    content = response.content
-
-    temp_dir = Path(tempfile.mkdtemp())
-
-    with tarfile.open(fileobj=io.BytesIO(content), mode="r:gz") as tar:
-        tar.extractall(path=temp_dir)
-
-    return temp_dir
-
-
-def _copy_ui_files(temp_dir: Path, target_path: Path) -> None:
-    """Copy files from the .next directory to the static directory."""
-    target_path.mkdir(parents=True, exist_ok=True)
-    next_dir = temp_dir / "package/dist/static"
-
-    if next_dir.exists():
-        for item in next_dir.iterdir():
-            dest = target_path / item.name
+    try:
+        # Clear the destination directory first to avoid stale files
+        for item in destination_path.iterdir():
            if item.is_dir():
-                shutil.copytree(item, dest, dirs_exist_ok=True)
+                shutil.rmtree(item)
            else:
-                shutil.copy2(item, dest)
+                item.unlink()
+
+        # Get a reference to the source directory using importlib.resources.files (Python 3.9+)
+        source_dir_ref = importlib.resources.files(PACKAGE_NAME).joinpath(
+            RESOURCE_DIR_NAME
+        )
+
+        if not source_dir_ref.is_dir():
+            logger.error(
+                f"Static UI resource directory '{RESOURCE_DIR_NAME}' not found in package '{PACKAGE_NAME}'. Path: {source_dir_ref}"
+            )
+            logger.error(
+                "Ensure the static files are correctly bundled with the package and the path is correct."
+            )
+            return
+
+        for source_item_path_ref in source_dir_ref.iterdir():
+            # Skip __init__.py or other non-static files if present (though less likely needed with direct iteration)
+            if source_item_path_ref.name.startswith(
+                "__"
+            ) or source_item_path_ref.name.endswith(".py"):
+                continue
+
+            dest_item_path = destination_path / source_item_path_ref.name
+
+            # importlib.resources.as_file is needed to get a concrete path for shutil operations
+            with importlib.resources.as_file(
+                source_item_path_ref
+            ) as concrete_source_item_path:
+                if concrete_source_item_path.is_dir():
+                    shutil.copytree(
+                        concrete_source_item_path, dest_item_path, dirs_exist_ok=True
+                    )
+                elif concrete_source_item_path.is_file():
+                    shutil.copy2(concrete_source_item_path, dest_item_path)
+                else:
+                    logger.warning(
+                        f"Skipping resource '{source_item_path_ref.name}' as it's not a file or directory."
+                    )
+
+        logger.info(f"Chat UI files copied from package to '{destination_path}'")
+
+    except FileNotFoundError:
+        logger.error(
+            "Oops! The chat UI files are not found. Please report this issue to the LlamaIndex team."
+        )
+    except Exception as e:
+        logger.error(f"Failed to copy bundled chat UI files: {e}.")
@@ -0,0 +1,52 @@
+# Used by SuggestNextQuestionsService
+# Override this prompt by setting the `NEXT_QUESTION_PROMPT` environment variable
+SUGGEST_NEXT_QUESTION_PROMPT = """You're a helpful assistant! Your task is to suggest the next questions that user might interested in to keep the conversation going.
+Here is the conversation history
+---------------------
+{conversation}
+---------------------
+Given the conversation history, please give me 3 questions that user might ask next!
+Your answer should be wrapped in three sticks without any index numbers and follows the following format:
+```
+<question 1>
+<question 2>
+<question 3>
+```
+"""
+
+# Used as a prompt for synthesizer
+# Override this prompt by setting the `CITATION_PROMPT` environment variable
+CITATION_PROMPT = """
+Context information is below.
+------------------
+{context_str}
+------------------
+The context are multiple text chunks, each text chunk has its own citation_id at the beginning.
+Use the citation_id for citation construction.
+
+Answer the following query with citations:
+------------------
+{query_str}
+------------------
+
+## Citation format
+
+[citation:id]
+
+Where:
+- [citation:] is a matching pattern which is required for all citations.
+- `id` is the `citation_id` provided in the context or previous response.
+
+Example:
+```
+    Here is a response that uses context information [citation:90ca859f-4f32-40ca-8cd0-edfad4fb298b] 
+    and other ideas that don't use context information [citation:17b2cc9a-27ae-4b6d-bede-5ca60fc00ff4] .\n
+    The citation block will be displayed automatically with useful information for the user in the UI [citation:1c606612-e75f-490e-8374-44e79f818d19] .
+```
+
+## Requirements:
+1. Always include citations for every fact from the context information in your response. 
+2. Make sure that the citation_id is correct with the context, don't mix up the citation_id with other information.
+
+Now, you answer the query with citations:
+"""
@@ -13,17 +13,15 @@ from llama_index.core.workflow import Workflow
 from llama_index.server.api.routers import (
    chat_router,
    custom_components_router,
+    custom_layout_router,
    dev_router,
 )
-from llama_index.server.chat_ui import download_chat_ui
+from llama_index.server.chat_ui import copy_bundled_chat_ui
 from llama_index.server.settings import server_settings


 class UIConfig(BaseModel):
    enabled: bool = Field(default=True, description="Whether to enable the chat UI")
-    app_title: str = Field(
-        default="LlamaIndex Server", description="The title of the chat UI"
-    )
    starter_questions: Optional[list[str]] = Field(
        default=None, description="The starter questions for the chat UI"
    )
@@ -37,6 +35,10 @@ class UIConfig(BaseModel):
    component_dir: Optional[str] = Field(
        default=None, description="The directory to custom UI components code"
    )
+    layout_dir: str = Field(
+        default="layout",
+        description="The directory to custom UI layout such as header and footer",
+    )
    dev_mode: bool = Field(
        default=False, description="Whether to enable the UI dev mode"
    )
@@ -46,13 +48,20 @@ class UIConfig(BaseModel):
            {
                "CHAT_API": f"{server_settings.api_url}/chat",
                "STARTER_QUESTIONS": self.starter_questions or [],
-                "LLAMA_CLOUD_API": f"{server_settings.api_url}/chat/config/llamacloud"
-                if self.llamacloud_index_selector and os.getenv("LLAMA_CLOUD_API_KEY")
-                else None,
-                "APP_TITLE": self.app_title,
-                "COMPONENTS_API": f"{server_settings.api_url}/components"
-                if self.component_dir
-                else None,
+                "LLAMA_CLOUD_API": (
+                    f"{server_settings.api_url}/chat/config/llamacloud"
+                    if self.llamacloud_index_selector
+                    and os.getenv("LLAMA_CLOUD_API_KEY")
+                    else None
+                ),
+                "COMPONENTS_API": (
+                    f"{server_settings.api_url}/components"
+                    if self.component_dir
+                    else None
+                ),
+                "LAYOUT_API": (
+                    f"{server_settings.api_url}/layout" if self.layout_dir else None
+                ),
                "DEV_MODE": self.dev_mode,
            },
            indent=2,
@@ -68,11 +77,12 @@ class LlamaIndexServer(FastAPI):
        self,
        workflow_factory: Callable[..., Workflow],
        logger: Optional[logging.Logger] = None,
-        use_default_routers: Optional[bool] = True,
+        use_default_routers: Optional[bool] = None,
        env: Optional[str] = None,
        ui_config: Optional[Union[UIConfig, dict]] = None,
        server_url: Optional[str] = None,
        api_prefix: Optional[str] = None,
+        suggest_next_questions: Optional[bool] = None,
        verbose: bool = False,
        *args: Any,
        **kwargs: Any,
@@ -88,6 +98,7 @@ class LlamaIndexServer(FastAPI):
            ui_config: The configuration for the chat UI.
            server_url: The URL of the server.
            api_prefix: The prefix for the API endpoints.
+            suggest_next_questions: Whether to suggest next questions after the assistant's response.
            verbose: Whether to show verbose logs.
        """
        super().__init__(*args, **kwargs)
@@ -95,7 +106,12 @@ class LlamaIndexServer(FastAPI):
        self.workflow_factory = workflow_factory
        self.logger = logger or logging.getLogger("uvicorn")
        self.verbose = verbose
-        self.use_default_routers = use_default_routers or True
+        self.use_default_routers = (
+            True if use_default_routers is None else use_default_routers
+        )
+        self.suggest_next_questions = (
+            True if suggest_next_questions is None else suggest_next_questions
+        )
        if ui_config is None:
            self.ui_config = UIConfig()
        elif isinstance(ui_config, dict):
@@ -146,6 +162,7 @@ class LlamaIndexServer(FastAPI):
            chat_router(
                self.workflow_factory,
                self.logger,
+                self.suggest_next_questions,
            ),
            prefix=server_settings.api_prefix,
        )
@@ -162,6 +179,15 @@ class LlamaIndexServer(FastAPI):
            prefix=server_settings.api_prefix,
        )

+    def add_layout_router(self) -> None:
+        """
+        Add the layout router.
+        """
+        self.include_router(
+            custom_layout_router(self.ui_config.layout_dir, self.logger),
+            prefix=server_settings.api_prefix,
+        )
+
    def mount_ui(self) -> None:
        """
        Mount the UI.
@@ -173,13 +199,20 @@ class LlamaIndexServer(FastAPI):
                if not os.path.exists(self.ui_config.component_dir):
                    os.makedirs(self.ui_config.component_dir)
                self.add_components_router()
+            # Layout dir
+            if self.ui_config.layout_dir:
+                if not os.path.exists(self.ui_config.layout_dir):
+                    os.makedirs(self.ui_config.layout_dir)
+                self.add_layout_router()
            # UI static files
            if not os.path.exists(self.ui_config.ui_path):
                os.makedirs(self.ui_config.ui_path)
                self.logger.warning(
-                    f"UI files not found, downloading UI to {self.ui_config.ui_path}"
+                    f"UI files not found at {self.ui_config.ui_path}. Copying bundled UI files."
+                )
+                copy_bundled_chat_ui(
+                    logger=self.logger, target_path=self.ui_config.ui_path
                )
-                download_chat_ui(logger=self.logger, target_path=self.ui_config.ui_path)
            self._mount_static_files(
                directory=self.ui_config.ui_path,
                path="/",
@@ -6,31 +6,28 @@ from llama_index.server.api.models import ComponentDefinition


 class CustomUI:
-    def __init__(
-        self, component_dir: str, logger: Optional[logging.Logger] = None
-    ) -> None:
-        self.component_dir = component_dir
+    def __init__(self, logger: Optional[logging.Logger] = None) -> None:
        self.logger = logger or logging.getLogger(__name__)

-    def get_components(self) -> List[ComponentDefinition]:
+    def get_components(
+        self, directory: str, filter_types: Optional[List[str]] = None
+    ) -> List[ComponentDefinition]:
        """
        List all js files in the component directory and return a list of ComponentDefinition objects.
        Ignores files that fail to load and logs the error.
        TSX files take precedence over JSX files when duplicate component names are found.
        """
        components_dict: dict[str, ComponentDefinition] = {}
-        if not os.path.exists(self.component_dir):
-            self.logger.warning(
-                f"Component directory {self.component_dir} does not exist"
-            )
+        if not os.path.exists(directory):
+            self.logger.warning(f"Component directory {directory} does not exist")
            return []
        try:
-            for file in os.listdir(self.component_dir):
+            for file in os.listdir(directory):
                if not file.endswith((".jsx", ".tsx")):
                    continue

                component_name = file.split(".")[0]
-                file_path = os.path.join(self.component_dir, file)
+                file_path = os.path.join(directory, file)
                file_ext = os.path.splitext(file)[1]

                try:
@@ -78,4 +75,11 @@ class CustomUI:
        except Exception as e:
            self.logger.error(f"Error reading component directory: {str(e)}")

-        return list(components_dict.values())
+        result = list(components_dict.values())
+
+        if filter_types:
+            result = [
+                component for component in result if component.type in filter_types
+            ]
+
+        return result
@@ -8,10 +8,12 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union
 import requests
 from fastapi import BackgroundTasks
 from llama_cloud import ManagedIngestionStatus, PipelineFileCreateCustomMetadataValue
+from pydantic import BaseModel
+
 from llama_index.core.schema import NodeWithScore
 from llama_index.server.api.models import SourceNodes
 from llama_index.server.services.llamacloud.index import get_client
-from pydantic import BaseModel
+from llama_index.server.utils import llamacloud

 logger = logging.getLogger("uvicorn")

@@ -33,7 +35,6 @@ class LlamaCloudFile(BaseModel):

 class LlamaCloudFileService:
    LOCAL_STORE_PATH = "output/llamacloud"
-    DOWNLOAD_FILE_NAME_TPL = "{pipeline_id}${filename}"

    @classmethod
    def get_all_projects_with_pipelines(cls) -> List[Dict[str, Any]]:
@@ -155,13 +156,12 @@ class LlamaCloudFileService:
        # Remove duplicates and return
        return set(llama_cloud_files)

-    @classmethod
-    def _get_file_name(cls, name: str, pipeline_id: str) -> str:
-        return cls.DOWNLOAD_FILE_NAME_TPL.format(pipeline_id=pipeline_id, filename=name)
-
    @classmethod
    def _get_file_path(cls, name: str, pipeline_id: str) -> str:
-        return os.path.join(cls.LOCAL_STORE_PATH, cls._get_file_name(name, pipeline_id))
+        file_name = llamacloud.get_local_file_name(
+            llamacloud_file_name=name, pipeline_id=pipeline_id
+        )
+        return os.path.join(cls.LOCAL_STORE_PATH, file_name)

    @classmethod
    def _download_file(cls, url: str, local_file_path: str) -> None:
@@ -6,6 +6,7 @@ from typing import List, Optional, Union
 from llama_index.core.prompts import PromptTemplate
 from llama_index.core.settings import Settings
 from llama_index.server.api.models import ChatAPIMessage
+from llama_index.server.prompts import SUGGEST_NEXT_QUESTION_PROMPT

 logger = logging.getLogger("uvicorn")

@@ -15,28 +16,11 @@ class SuggestNextQuestionsService:
    Suggest the next questions that user might ask based on the conversation history.
    """

-    prompt = PromptTemplate(
-        r"""
-You're a helpful assistant! Your task is to suggest the next questions that user might interested in to keep the conversation going.
-Here is the conversation history
---------------------
-{conversation}
---------------------
-Given the conversation history, please give me 3 questions that user might ask next!
-Your answer should be wrapped in three sticks without any index numbers and follows the following format:
-\`\`\`
-<question 1>
-<question 2>
-<question 3>
-\`\`\`
-"""
-    )
-
    @classmethod
    def get_configured_prompt(cls) -> PromptTemplate:
        prompt = os.getenv("NEXT_QUESTION_PROMPT", None)
        if not prompt:
-            return cls.prompt
+            return PromptTemplate(SUGGEST_NEXT_QUESTION_PROMPT)
        return PromptTemplate(prompt)

    @classmethod
@@ -1,3 +1,4 @@
+from .citation import CitationSynthesizer, NodeCitationProcessor
 from .query import get_query_engine_tool

-__all__ = ["get_query_engine_tool"]
+__all__ = ["get_query_engine_tool", "NodeCitationProcessor", "CitationSynthesizer"]
@@ -0,0 +1,72 @@
+import logging
+from typing import Any, List, Optional
+
+from llama_index.core import QueryBundle
+from llama_index.core.postprocessor.types import BaseNodePostprocessor
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.query_engine.retriever_query_engine import RetrieverQueryEngine
+from llama_index.core.response_synthesizers import Accumulate
+from llama_index.core.schema import NodeWithScore
+from llama_index.core.tools.query_engine import QueryEngineTool
+from llama_index.server.prompts import CITATION_PROMPT
+
+logger = logging.getLogger(__name__)
+
+
+class NodeCitationProcessor(BaseNodePostprocessor):
+    """
+    Add a new field `citation_id` to the metadata of the node by copying the id from the node.
+    Useful for citation construction.
+    """
+
+    def _postprocess_nodes(
+        self,
+        nodes: List[NodeWithScore],
+        query_bundle: Optional[QueryBundle] = None,
+    ) -> List[NodeWithScore]:
+        for node_score in nodes:
+            node_score.node.metadata["citation_id"] = node_score.node.node_id
+        return nodes
+
+
+class CitationSynthesizer(Accumulate):
+    """
+    Overload the Accumulate synthesizer to:
+    1. Update prepare node metadata for citation id
+    2. Update text_qa_template to include citations
+    """
+
+    def __init__(self, **kwargs: Any) -> None:
+        text_qa_template = kwargs.pop("text_qa_template", None)
+        if text_qa_template is None:
+            text_qa_template = PromptTemplate(template=CITATION_PROMPT)
+        super().__init__(text_qa_template=text_qa_template, **kwargs)
+
+
+# Add this prompt to your agent system prompt
+CITATION_SYSTEM_PROMPT = (
+    "\nAnswer the user question using the response from the query tool. "
+    "It's important to respect the citation information in the response. "
+    "Don't mix up the citation_id, keep them at the correct fact."
+)
+
+
+def enable_citation(query_engine_tool: QueryEngineTool) -> QueryEngineTool:
+    """
+    Enable citation for a query engine tool by using CitationSynthesizer and NodePostprocessor.
+    Note: This function will override the response synthesizer of your query engine.
+    """
+    query_engine = query_engine_tool.query_engine
+    if not isinstance(query_engine, RetrieverQueryEngine):
+        raise ValueError(
+            "Citation feature requires a RetrieverQueryEngine. Your tool's query engine is a "
+            f"{type(query_engine)}."
+        )
+    # Update the response synthesizer and node postprocessors
+    query_engine._response_synthesizer = CitationSynthesizer()
+    query_engine._node_postprocessors += [NodeCitationProcessor()]
+    query_engine_tool._query_engine = query_engine
+
+    # Update tool metadata
+    query_engine_tool.metadata.description += "\nThe output will include citations with the format [citation:id] for each chunk of information in the knowledge base."
+    return query_engine_tool
@@ -1,9 +1,12 @@
+import logging
 import os
 from typing import Any, Optional

 from llama_index.core.base.base_query_engine import BaseQueryEngine
-from llama_index.core.tools.query_engine import QueryEngineTool
 from llama_index.core.indices.base import BaseIndex
+from llama_index.core.tools.query_engine import QueryEngineTool
+
+logger = logging.getLogger(__name__)


 def create_query_engine(index: BaseIndex, **kwargs: Any) -> BaseQueryEngine:
@@ -38,12 +41,11 @@ def get_query_engine_tool(
    if name is None:
        name = "query_index"
    if description is None:
-        description = (
-            "Use this tool to retrieve information about the text corpus from an index."
-        )
+        description = "Use this tool to retrieve information from a knowledge base. Provide a specific query and can call the tool multiple times if necessary."
    query_engine = create_query_engine(index, **kwargs)
-    return QueryEngineTool.from_defaults(
+    tool = QueryEngineTool.from_defaults(
        query_engine=query_engine,
        name=name,
        description=description,
    )
+    return tool
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Thuc Pham	4c8579b04f	use eject file in linux (#663 )	2025-05-29 09:15:52 +07:00
github-actions[bot]	bb1e82cdae	Release 0.1.18 (#660 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-28 17:57:45 +07:00
Huu Le	f682a1c36e	chore: add project directory to Prettier ignore list (#659 )	2025-05-28 17:50:23 +07:00
Huu Le	b8a1ff6412	feat: Support citation for agentic template (#642 )	2025-05-28 17:28:50 +07:00
Thuc Pham	5fe9e17d3f	feat: support eject to fully customize next folder (#653 )	2025-05-28 17:09:47 +07:00
Marcus Schiesser	15619d81a6	added claude code files	2025-05-27 13:39:57 +07:00
Huu Le	76742da78a	chore: add python release condition (#656 )	2025-05-27 09:25:36 +07:00
github-actions[bot]	693d7a0ea5	Release 0.5.18 (#655 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-26 18:43:41 +07:00
Huu Le	8d59ef0a6b	chore: Add layout_dir config to the generated python code (#654 )	2025-05-26 18:09:31 +07:00
github-actions[bot]	c62f26e31c	Release 0.1.17 (#652 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-26 11:21:23 +07:00
Huu Le	d3f73679b4	chore: add server package path to ESLint ignore list (#651 )	2025-05-26 10:58:40 +07:00
Huu Le	91c35cff33	fix release action didn't run custom version command (#650 )	2025-05-26 10:43:11 +07:00
github-actions[bot]	82ac925224	Release 0.1.17 (#644 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-23 17:10:05 +07:00
thucpn	f24ee8e6f9	fix: missing comma in config	2025-05-23 16:39:27 +07:00
Thuc Pham	3acec88fbc	chore: bump chat-ui (#645 )	2025-05-23 15:18:17 +07:00
Thuc Pham	eee3230e99	feat: support custom layout (#641 )	2025-05-23 14:18:22 +07:00
Marcus Schiesser	d8425e5290	docs: fix type	2025-05-23 13:22:11 +07:00
Huu Le	0bc5a0d882	feat: Add config for suggest next question (#640 ) * Enhance LlamaIndexServer with next question suggestion feature - Added `suggest_next_questions` parameter to the LlamaIndexServer for suggesting follow-up questions after the assistant's response. - Updated README.md to document the new configuration option. - Introduced `SUGGEST_NEXT_QUESTION_PROMPT` in prompts.py for customizable question suggestions. - Bumped version to 0.1.16 in uv.lock to reflect the new feature. * Implement next question suggestion feature in LlamaIndexServer - Added `suggestNextQuestions` option to LlamaIndexServer for suggesting follow-up questions after the assistant's response. - Updated README.md to include the new configuration option. - Modified example workflow to utilize the new feature. - Enhanced chat handler to conditionally send suggested questions based on the new option. * add changeset * remove log * bundle ui instead of download * check test * check test check test check test check test check test check test check test check test check test check test * fix tests * Update artifact path in workflow and clarify README.md text - Changed the artifact path in the GitHub Actions workflow from `python/llama-index-server/dist/` to `dist/`. - Revised README.md to clarify the default prompt used for the `suggest_next_questions` configuration option. * support changeset for python * refactor: update llama-index-server structure and workflows * fix workflows * fix workflows * fix workflows * add changeset * fix cannot release python * Update packages/server/README.md Co-authored-by: Thuc Pham <51660321+thucpn@users.noreply.github.com> * Update starter questions in LlamaIndex App and add TODO for suggestion feature in chat API --------- Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de> Co-authored-by: Thuc Pham <51660321+thucpn@users.noreply.github.com>	2025-05-23 12:48:45 +07:00