try fixed version

try 1 use case
test with 1 option
2026-07-02 19:14:28 -04:00 · 2025-06-12 13:59:20 +07:00 · 2025-06-12 13:53:17 +07:00 · 2025-06-12 13:46:11 +07:00 · 2025-06-12 12:27:58 +07:00 · 2025-06-09 16:53:49 +07:00
230 changed files with 18971 additions and 5599 deletions
@@ -0,0 +1,5 @@
+---
+"@create-llama/llama-index-server": patch
+---
+
+fix cannot catch the error raised from the workflow
@@ -12,87 +12,18 @@ on:
      - ".github/workflows/*llama_index_server.yml"

 jobs:
-  e2e-python:
-    name: python
-    timeout-minutes: 60
-    strategy:
-      fail-fast: true
-      matrix:
-        node-version: [20]
-        python-version: ["3.11"]
-        os: [macos-latest, windows-latest, ubuntu-22.04]
-        frameworks: ["fastapi"]
-        datasources: ["--no-files", "--example-file", "--llamacloud"]
-    defaults:
-      run:
-        shell: bash
-    runs-on: ${{ matrix.os }}
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Set up python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install uv
-        run: curl -LsSf https://astral.sh/uv/install.sh | sh
-      - name: Add uv to PATH # Ensure uv is available in subsequent steps
-        run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
-
-      - uses: pnpm/action-setup@v3
-
-      - name: Setup Node.js ${{ matrix.node-version }}
-        uses: actions/setup-node@v4
-        with:
-          node-version: ${{ matrix.node-version }}
-          cache: "pnpm"
-
-      - name: Install dependencies
-        run: pnpm install
-
-      - name: Install Playwright Browsers
-        run: pnpm exec playwright install --with-deps
-        working-directory: packages/create-llama
-
-      - name: Build create-llama
-        run: pnpm run build
-        working-directory: packages/create-llama
-
-      - name: Install
-        run: pnpm run pack-install
-        working-directory: packages/create-llama
-
-      - name: Run Playwright tests for Python
-        run: pnpm run e2e:python
-        env:
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-          LLAMA_CLOUD_API_KEY: ${{ secrets.LLAMA_CLOUD_API_KEY }}
-          FRAMEWORK: ${{ matrix.frameworks }}
-          DATASOURCE: ${{ matrix.datasources }}
-          PYTHONIOENCODING: utf-8
-          PYTHONLEGACYWINDOWSSTDIO: utf-8
-        working-directory: packages/create-llama
-
-      - uses: actions/upload-artifact@v4
-        if: always()
-        with:
-          name: playwright-report-python-${{ matrix.os }}-${{ matrix.frameworks }}-${{ matrix.datasources }}
-          path: packages/create-llama/playwright-report/
-          overwrite: true
-          retention-days: 30
-
  e2e-typescript:
    name: typescript
    timeout-minutes: 60
    strategy:
      fail-fast: true
      matrix:
-        node-version: [20, 22]
+        node-version: [20]
        python-version: ["3.11"]
-        os: [macos-latest, windows-latest, ubuntu-22.04]
+        os: [macos-latest]
        frameworks: ["nextjs"]
-        datasources: ["--no-files", "--example-file", "--llamacloud"]
+        datasources: ["--llamacloud"]
+        template-types: ["llamaindexserver"]
    defaults:
      run:
        shell: bash
@@ -133,6 +64,21 @@ jobs:
        run: pnpm run pack-install
        working-directory: packages/create-llama

+      - name: Build server
+        run: pnpm run build
+        working-directory: packages/server
+
+      - name: Pack @llamaindex/server package
+        run: |
+          pnpm pack --pack-destination "${{ runner.temp }}"
+          if [ "${{ runner.os }}" == "Windows" ]; then
+            file=$(find "${{ runner.temp }}" -name "llamaindex-server-*.tgz" | head -n 1)
+            mv "$file" "${{ runner.temp }}/llamaindex-server.tgz"
+          else
+            mv ${{ runner.temp }}/llamaindex-server-*.tgz ${{ runner.temp }}/llamaindex-server.tgz
+          fi
+        working-directory: packages/server
+
      - name: Run Playwright tests for TypeScript
        run: pnpm run e2e:typescript
        env:
@@ -140,12 +86,14 @@ jobs:
          LLAMA_CLOUD_API_KEY: ${{ secrets.LLAMA_CLOUD_API_KEY }}
          FRAMEWORK: ${{ matrix.frameworks }}
          DATASOURCE: ${{ matrix.datasources }}
+          TEMPLATE_TYPE: ${{ matrix.template-types }}
+          SERVER_PACKAGE_PATH: ${{ runner.temp }}/llamaindex-server.tgz
        working-directory: packages/create-llama

      - uses: actions/upload-artifact@v4
        if: always()
        with:
-          name: playwright-report-typescript-${{ matrix.os }}-${{ matrix.frameworks }}-${{ matrix.datasources }}-node${{ matrix.node-version }}
+          name: playwright-report-typescript-${{ matrix.os }}-${{ matrix.frameworks }}-${{ matrix.datasources }}-node${{ matrix.node-version }}-${{ matrix.template-types }}
          path: packages/create-llama/playwright-report/
          overwrite: true
          retention-days: 30
@@ -16,6 +16,16 @@ jobs:

      - uses: pnpm/action-setup@v3

+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
@@ -31,6 +41,13 @@ jobs:
      - name: Run Prettier
        run: pnpm run format

+      - name: Run build
+        run: pnpm run build
+
+      - name: Run Typecheck for examples
+        run: pnpm run typecheck
+        working-directory: packages/server/examples
+
      - name: Run Python format check
        uses: chartboost/ruff-action@v1
        with:
@@ -17,6 +17,11 @@ jobs:

      - uses: pnpm/action-setup@v3

+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
      - name: Install uv
        uses: astral-sh/setup-uv@v3

@@ -51,8 +56,12 @@ jobs:
        with:
          commit: Release ${{ steps.get-changeset-status.outputs.new-version }}
          title: Release ${{ steps.get-changeset-status.outputs.new-version }}
+          # bump versions
+          version: pnpm new-version
          # build package and call changeset publish
          publish: pnpm release
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
+          PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
+          UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
@@ -1,138 +0,0 @@
-name: Release llama-index-server
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - "python/llama-index-server/**"
-      - ".github/workflows/release_llama_index_server.yml"
-  pull_request:
-    types:
-      - closed
-
-concurrency: ${{ github.workflow }}-${{ github.ref }}
-
-jobs:
-  release:
-    name: Create Release PR
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: ./python/llama-index-server
-    if: |
-      github.event_name == 'push' && 
-      !startsWith(github.ref, 'refs/heads/release/llama-index-server-v') &&
-      !contains(github.event.head_commit.message, 'Release: llama-index-server v')
-
-    steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          enable-cache: true
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install dependencies
-        shell: bash
-        run: uv sync --all-extras --dev
-
-      - name: Setup Git
-        run: |
-          git config --global user.email "github-actions[bot]@users.noreply.github.com"
-          git config --global user.name "github-actions[bot]"
-
-      - name: Bump patch version
-        shell: bash
-        run: |
-          uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version $(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version | awk -F. '{$NF = $NF + 1;}1' OFS=.)
-          git add pyproject.toml
-          git commit -m "chore(release): bump llama-index-server version to $(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)"
-
-      - name: Get current version
-        id: get_version
-        shell: bash
-        run: |
-          version=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)
-          echo "current_version=${version}" >> "$GITHUB_OUTPUT"
-
-      - name: Create Release PR
-        uses: peter-evans/create-pull-request@v6
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          commit-message: "Release: llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          title: "Release: llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          body: |
-            This PR was automatically created to release a new version of the llama-index-server package.
-
-            Version: ${{ steps.get_version.outputs.current_version }}
-
-            Please review the changes and merge to trigger the release.
-          branch: release/llama-index-server-v${{ steps.get_version.outputs.current_version }}
-          base: main
-          labels: release, llama-index-server
-
-  publish:
-    name: Publish to PyPI
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: ./python/llama-index-server
-    if: |
-      github.event_name == 'pull_request' && 
-      github.event.pull_request.merged == true && 
-      startsWith(github.event.pull_request.title, 'Release: llama-index-server') &&
-      startsWith(github.event.pull_request.head.ref, 'release/llama-index-server-v')
-
-    steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          enable-cache: true
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install dependencies
-        shell: bash
-        run: uv sync --all-extras
-
-      - name: Get current version
-        id: get_version
-        shell: bash
-        run: |
-          version=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)
-          echo "current_version=${version}" >> "$GITHUB_OUTPUT"
-
-      - name: Build package
-        shell: bash
-        run: uv build --no-sources
-
-      - name: Publish to PyPI
-        shell: bash
-        run: uv publish --token ${{ secrets.PYPI_TOKEN }}
-
-      - name: Create GitHub Release
-        uses: softprops/action-gh-release@v2
-        with:
-          tag_name: llama-index-server-v${{ steps.get_version.outputs.current_version }}
-          name: "llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          body: |
-            Release of llama-index-server v${{ steps.get_version.outputs.current_version }}
-          draft: false
-          prerelease: false
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -5,6 +5,7 @@ on:

 env:
  PYTHON_VERSION: "3.9"
+  UI_TEST: "true"

 jobs:
  unit-test:
@@ -19,20 +20,27 @@ jobs:
        python-version: ["3.9"]
    steps:
      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v3
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}

      - name: Install uv
        uses: astral-sh/setup-uv@v5
        with:
          enable-cache: true

-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
        with:
-          python-version: ${{ matrix.python-version }}
+          node-version-file: ".nvmrc"
+          cache: "pnpm"

      - name: Install dependencies
        shell: bash
-        run: uv sync --all-extras --dev
+        run: pnpm install && pnpm build

      - name: Run unit tests
        shell: bash
@@ -46,20 +54,20 @@ jobs:
        working-directory: python/llama-index-server
    steps:
      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v3
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ env.PYTHON_VERSION }}

      - name: Install uv
        uses: astral-sh/setup-uv@v5
        with:
          enable-cache: true

-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-
      - name: Install dependencies
-        shell: bash
-        run: uv sync --all-extras --dev
+        run: pnpm install

      - name: Run mypy
        shell: bash
@@ -73,27 +81,56 @@ jobs:
        working-directory: python/llama-index-server
    steps:
      - uses: actions/checkout@v4
-
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          enable-cache: true
+      - uses: pnpm/action-setup@v3

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}

-      - name: Install build package
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: ".nvmrc"
+          cache: "pnpm"
+
+      - name: Install dependencies
+        run: pnpm install && pnpm build
+
+      - name: Build package
        shell: bash
-        run: uv sync --all-extras
+        run: uv build
+
+      - name: Get the absolute wheel file path and save it to the output
+        shell: bash
+        id: get_whl_path
+        run: |
+          WHL_FILE=$(readlink -f dist/*.whl)
+          echo "whl_file=$WHL_FILE" >> $GITHUB_OUTPUT

      - name: Test import
        shell: bash
-        run: uv run python -c "from llama_index.server import LlamaIndexServer"
+        working-directory: ${{ github.workspace }}
+        env:
+          WHL_FILE: ${{ steps.get_whl_path.outputs.whl_file }}
+        run: |
+          uv run --with $WHL_FILE python -c "from llama_index.server import LlamaIndexServer"
+
+      - name: Check frontend resources is present
+        shell: bash
+        working-directory: ${{ github.workspace }}
+        env:
+          WHL_FILE: ${{ steps.get_whl_path.outputs.whl_file }}
+        run: |
+          uv run --with $WHL_FILE python -c "from llama_index.server.chat_ui import check_ui_resources; check_ui_resources()"

      - name: Upload artifact
        uses: actions/upload-artifact@v4
        with:
          name: llama-index-server
-          path: python/llama-index-server/dist/
+          path: dist/
@@ -7,6 +7,9 @@ build/
 .next/
 out/
 packages/server/server/
+packages/server/project/
+**/playwright-report/
+**/test-results/

 # Python
 python/
@@ -0,0 +1,201 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Repository Overview
+
+Create-llama is a monorepo containing CLI tools and server frameworks for building LlamaIndex-powered applications. The repository combines TypeScript/Node.js and Python components in a unified development environment.
+
+## Architecture
+
+### Monorepo Structure
+
+- **`packages/create-llama/`**: Main CLI tool for scaffolding LlamaIndex applications
+- **`packages/server/`**: TypeScript/Next.js server framework (`@llamaindex/server`)
+- **`python/llama-index-server/`**: Python/FastAPI server framework
+- **Root**: Workspace configuration and shared development tools
+
+### Key Technologies
+
+- **Package Manager**: pnpm with workspace configuration
+- **Build Tools**: bunchee (TypeScript), Next.js, hatchling (Python)
+- **Testing**: Playwright for e2e, pytest for Python
+- **Version Management**: changesets for TypeScript packages, manual for Python
+
+## Development Commands
+
+### Root Level (Monorepo)
+
+```bash
+pnpm dev          # Start all packages in development mode
+pnpm build        # Build all packages
+pnpm lint         # ESLint across TypeScript packages
+pnpm format       # Prettier formatting
+pnpm e2e          # Run end-to-end tests
+```
+
+### Create-llama Package
+
+```bash
+cd packages/create-llama
+npm run build     # Build CLI using bash script and ncc
+npm run dev       # Watch mode development
+npm run e2e       # Playwright tests for generated projects
+npm run clean     # Clean build artifacts and template caches
+```
+
+### TypeScript Server Package
+
+```bash
+cd packages/server
+pnpm dev          # Watch mode with bunchee
+pnpm build        # Multi-step build: ESM/CJS + Next.js + static assets
+pnpm clean        # Clean all build outputs
+```
+
+### Python Server Package
+
+```bash
+cd python/llama-index-server
+uv run generate   # Index data files
+fastapi dev       # Start development server with hot reload
+pytest            # Run test suite
+```
+
+## Template System
+
+The CLI uses a sophisticated template system in `packages/create-llama/templates/`:
+
+### Organization
+
+- **`types/`**: Base project structures (streaming, reflex, llamaindexserver)
+- **`components/`**: Reusable components across frameworks
+  - `engines/` - Chat and agent engines
+  - `loaders/` - File, web, database loaders
+  - `providers/` - AI model configurations
+  - `vectordbs/` - Vector database integrations
+  - `use-cases/` - Workflow implementations
+
+### Development Workflow
+
+- Templates support multiple frameworks (Next.js, Express, FastAPI)
+- Component system allows mix-and-match functionality
+- E2E tests validate generated projects work correctly
+
+## Server Framework Architecture
+
+### TypeScript Server (`@llamaindex/server`)
+
+- **Core**: `LlamaIndexServer` class wrapping Next.js with workflow support
+- **Frontend**: React-based chat UI with shadcn/ui components
+- **API**: `/api/chat` endpoint with streaming responses
+- **Build Process**: Complex multi-step build including static assets for Python integration
+
+### Python Server (`llama-index-server`)
+
+- **Core**: `LlamaIndexServer` class extending FastAPI
+- **Architecture**: Workflow factory pattern for stateless request handling
+- **UI Generation**: AI-powered React component generation from Pydantic schemas
+- **Development**: Hot reloading support with dev mode
+
+## Common Patterns
+
+### Workflow Integration
+
+Both server frameworks use factory patterns:
+
+```typescript
+// TypeScript
+const server = new LlamaIndexServer({
+  workflow: (context) => createWorkflow(context)
+});
+
+// Python
+def create_workflow(chat_request: ChatRequest) -> Workflow:
+    return MyWorkflow(chat_request.messages)
+```
+
+### Event System
+
+Structured events for UI communication:
+
+- **UIEvent**: Custom components with Pydantic/Zod schemas
+- **ArtifactEvent**: Code/documents for Canvas panel
+- **SourceNodesEvent**: Document sources with metadata
+- **AgentRunEvent**: Tool usage and progress tracking
+
+### File Handling
+
+- Both servers auto-mount `data/` and `output/` directories
+- LlamaCloud integration for remote file access
+- Static file serving through framework-specific methods
+
+## Testing Strategy
+
+### E2E Testing
+
+- Playwright tests in `packages/create-llama/e2e/`
+- Tests both Python and TypeScript generated projects
+- Validates CLI generation and application functionality
+
+### Unit Testing
+
+- Python: pytest with comprehensive API and service tests
+- TypeScript: Integrated testing through build process
+
+## Build Process
+
+### Create-llama CLI
+
+1. TypeScript compilation with bash script
+2. ncc bundling for standalone executable
+3. Template validation and caching
+
+### Server Package Build
+
+1. **prebuild**: Clean directories
+2. **build**: bunchee compilation to ESM/CJS
+3. **postbuild**: Next.js preparation and static asset generation
+4. **prepare:py-static**: Python integration assets
+
+### Release Process
+
+```bash
+pnpm release     # Build all + publish npm packages + Python release
+```
+
+## Development Environment Setup
+
+### Prerequisites
+
+- Node.js >=16.14.0
+- Python with uv package manager
+- pnpm for package management
+
+### Common Workflow
+
+1. Clone repository and run `pnpm install`
+2. For CLI development: work in `packages/create-llama/`
+3. For server development: choose TypeScript or Python package
+4. Use `pnpm dev` for concurrent development across packages
+5. Run `pnpm e2e` to validate changes with generated projects
+
+## Special Considerations
+
+### Template Development
+
+- Changes to templates require rebuilding CLI
+- E2E tests validate template functionality across frameworks
+- Template caching system speeds up repeated builds
+
+### Cross-package Dependencies
+
+- Server package builds static assets for Python integration
+- Version synchronization between TypeScript and Python packages
+- Shared UI components and styling across implementations
+
+### Performance
+
+- CLI uses caching for template operations
+- Server frameworks support streaming responses
+- Background processing for file operations and LlamaCloud integration
@@ -106,25 +106,6 @@ Ok to proceed? (y) y
 You can also pass command line arguments to set up a new project
 non-interactively. For a list of the latest options, call `create-llama --help`.

-### Running in pro mode
-
-If you prefer more advanced customization options, you can run `create-llama` in pro mode using the `--pro` flag.
-
-In pro mode, instead of selecting a predefined use case, you'll be prompted to select each technical component of your project. This allows for greater flexibility in customizing your project, including:
-
- **Vector Store**: Choose from a variety of vector stores for keeping your documents, including MongoDB, Pinecone, Weaviate, Qdrant and Chroma.
- **Tools**: Choose from a variety of agent tools (functions called by the LLM), such as:
-  - Code Interpreter: Executes Python code in a secure Jupyter notebook environment
-  - Artifact Code Generator: Generates code artifacts that can be run in a sandbox
-  - OpenAPI Action: Facilitates requests to a provided OpenAPI schema
-  - Image Generator: Creates images based on text descriptions
-  - Web Search: Performs web searches to retrieve up-to-date information
- **Data Sources**: Integrate various data sources into your chat application, including local files, websites, or database-retrieved data.
- **Backend Options**: Besides using Next.js or FastAPI, you can also select to use Express for a more traditional Node.js application.
- **Observability**: Choose from a variety of LLM observability tools, including LlamaTrace and Traceloop.
-
-Pro mode is ideal for developers who want fine-grained control over their project's configuration and are comfortable with more technical setup options.
-
 ## LlamaIndex Documentation

 - [TS/JS docs](https://ts.llamaindex.ai/)
@@ -57,6 +57,9 @@ export default tseslint.config(
      "**/out/**",
      "**/node_modules/**",
      "**/build/**",
+      "packages/server/server/**",
+      "packages/server/project/**",
+      "packages/server/bin/**",
    ],
  },
 );
@@ -13,7 +13,8 @@
  },
  "license": "MIT",
  "workspaces": [
-    "packages/*"
+    "packages/*",
+    "python/*"
  ],
  "scripts": {
    "dev": "pnpm -r dev",
@@ -24,8 +25,10 @@
    "format:write": "prettier --ignore-unknown --write .",
    "prepare": "husky",
    "new-snapshot": "pnpm -r build && changeset version --snapshot",
-    "new-version": "pnpm -r build && changeset version",
-    "release": "pnpm -r build && changeset publish",
+    "new-version-python": "pnpm --filter @create-llama/llama-index-server new-version",
+    "new-version": "pnpm -r build && changeset version && pnpm new-version-python",
+    "release-python": "pnpm --filter @create-llama/llama-index-server release",
+    "release": "pnpm -r build && changeset publish && pnpm release-python",
    "release-snapshot": "pnpm -r build && changeset publish --tag snapshot"
  },
  "devDependencies": {
@@ -1,5 +1,68 @@
 # create-llama

+## 0.5.21
+
+### Patch Changes
+
+- af9ad3c: feat: show document artifact after generating report
+- a543a27: feat: bump chat-ui with inline artifact
+
+## 0.5.20
+
+### Patch Changes
+
+- 3ff0a18: fix: default header padding
+
+## 0.5.19
+
+### Patch Changes
+
+- 5fe9e17: support eject to fully customize next folder
+- b8a1ff6: Support citation for agentic template (Python)
+
+## 0.5.18
+
+### Patch Changes
+
+- 8d59ef0: Add layout_dir config to the generated python code
+
+## 0.5.17
+
+### Patch Changes
+
+- eee3230: feat: support custom layout
+
+## 0.5.16
+
+### Patch Changes
+
+- 6f75d4a: fix: unsupported language in code gen workflow
+- d0618fa: Fix LlamaCloud generate script issue
+
+## 0.5.15
+
+### Patch Changes
+
+- 527075c: Enable dev mode that allows updating code directly in the UI
+
+## 0.5.14
+
+### Patch Changes
+
+- 1df8cfb: Split artifacts use case to document generator and code generator
+- 1b5a519: chore: improve dev experience with nodemon
+- b3eb0ba: Fix typing check issue
+- 556f33c: fix chromadb dependency issue
+- 2451539: fix: remove dead generated ai code
+- 7a70390: Deprecate pro mode
+
+## 0.5.13
+
+### Patch Changes
+
+- f4ca602: Add artifact use case for Typescript template
+- f4ca602: Update typescript use cases to use the new workflow engine
+
 ## 0.5.12

 ### Patch Changes
@@ -0,0 +1,108 @@
+# create-llama Package
+
+## Overview
+
+The `create-llama` package is a CLI tool for creating LlamaIndex-powered applications with one command. It's designed as a project generator that scaffolds various types of RAG (Retrieval-Augmented Generation) applications using different frameworks, databases, and AI model providers.
+
+## Package Structure
+
+### Core Files
+
+- **`index.ts`**: Main CLI entry point using Commander.js for argument parsing
+- **`create-app.ts`**: Core application creation logic and orchestration
+- **`package.json`**: Package configuration with binary entry point at `./dist/index.js`
+
+### Key Directories
+
+- **`helpers/`**: Utility functions for package management, file operations, and configuration
+- **`questions/`**: Interactive prompts for user configuration
+- **`templates/`**: Project templates for different frameworks and use cases
+- **`e2e/`**: End-to-end tests using Playwright
+
+## Core Functionality
+
+### CLI Interface
+
+The tool accepts numerous command-line options including:
+
+- Framework selection (`--framework`: nextjs, express, fastapi)
+- Template type (`--template`: streaming, multiagent, reflex, llamaindexserver)
+- Model providers (OpenAI, Anthropic, Groq, Ollama, etc.)
+- Vector databases (none, mongo, pg, pinecone, milvus, etc.)
+- Data sources (files, web URLs, databases)
+- Tools and observability options
+
+### Application Generation Flow
+
+1. **Project validation**: Checks project name validity and directory permissions
+2. **Interactive questioning**: Prompts user for configuration if not provided via CLI
+3. **Template installation**: Copies and configures appropriate templates
+4. **Environment setup**: Creates `.env` files with API keys and configuration
+5. **Dependencies**: Installs packages using detected/specified package manager
+6. **Post-install actions**: Can run the app, open VSCode, or install dependencies
+
+### Template System
+
+Templates are organized by:
+
+- **Framework**: NextJS (frontend), Express (Node backend), FastAPI (Python backend)
+- **Type**: Streaming chat, multiagent workflows, Reflex UI, LlamaIndex server
+- **Components**: Engines, loaders, providers, UI components, observability
+
+### Helper Functions
+
+Key helper modules include:
+
+- **Installation**: Package manager detection and dependency installation
+- **Data sources**: File copying, web scraping, database connection setup
+- **Providers**: Model provider configuration (OpenAI, Anthropic, etc.)
+- **Tools**: Integration with external tools (Wikipedia, weather, code generation)
+- **Environment**: `.env` file generation with API keys and settings
+
+## Development Commands
+
+### Build & Development
+
+- `npm run build`: Build the CLI using bash script
+- `npm run dev`: Watch mode development build
+- `npm run clean`: Clean build artifacts and temporary files
+
+### Testing
+
+- `npm run e2e`: Run all end-to-end tests
+- `npm run e2e:python`: Test Python-specific templates
+- `npm run e2e:typescript`: Test TypeScript-specific templates
+
+### Package Management
+
+- `npm run pack-install`: Create and install local package for testing
+
+## Architecture Notes
+
+### Model Configuration
+
+The tool supports multiple AI providers with a unified `ModelConfig` interface that includes:
+
+- Provider selection and API key management
+- Model and embedding model specification
+- Dimension configuration for embeddings
+
+### Data Source Handling
+
+Flexible data source configuration supporting:
+
+- Local files and directories
+- Web URLs with configurable crawling depth
+- Database connections with custom queries
+- Automatic file downloading and copying
+
+### Template Flexibility
+
+Templates use a component-based system allowing mix-and-match of:
+
+- Different frameworks (NextJS, Express, FastAPI)
+- Various vector databases
+- Multiple observability tools
+- Configurable tools and integrations
+
+This package serves as the foundation for rapidly prototyping and deploying LlamaIndex applications across different technology stacks and use cases.
@@ -3,7 +3,7 @@ import { exec } from "child_process";
 import fs from "fs";
 import path from "path";
 import util from "util";
-import { TemplateFramework, TemplateVectorDB } from "../../helpers/types";
+import { TemplateFramework, TemplateType, TemplateUseCase, TemplateVectorDB } from "../../helpers/types";
 import { RunCreateLlamaOptions, createTestDir, runCreateLlama } from "../utils";

 const execAsync = util.promisify(exec);
@@ -11,123 +11,193 @@ const execAsync = util.promisify(exec);
 const templateFramework: TemplateFramework = process.env.FRAMEWORK
  ? (process.env.FRAMEWORK as TemplateFramework)
  : "fastapi";
+const templateType: TemplateType = process.env.TEMPLATE_TYPE
+  ? (process.env.TEMPLATE_TYPE as TemplateType)
+  : "llamaindexserver";
+const useCases: TemplateUseCase[] = [
+  "agentic_rag",
+  "deep_research",
+  "financial_report",
+  "code_generator",
+  "document_generator",
+];
 const dataSource: string = process.env.DATASOURCE
  ? process.env.DATASOURCE
  : "--example-file";

-// TODO: add support for other templates
+test.describe("Mypy check", () => {
+  test.describe.configure({ retries: 0 });

-if (
-  dataSource === "--example-file" // XXX: this test provides its own data source - only trigger it on one data source (usually the CI matrix will trigger multiple data sources)
-) {
-  // vectorDBs, tools, and data source combinations to test
-  const vectorDbs: TemplateVectorDB[] = [
-    "mongo",
-    "pg",
-    "pinecone",
-    "milvus",
-    "astra",
-    "qdrant",
-    "chroma",
-    "weaviate",
-  ];
+    // Test for streaming template
+    test.describe("StreamingTemplate", () => {
+      test.skip(templateType !== "streaming", `skipping streaming test for ${templateType}`);
+      if (
+        dataSource === "--example-file" // XXX: this test provides its own data source - only trigger it on one data source (usually the CI matrix will trigger multiple data sources)
+      ) {
+        // vectorDBs, tools, and data source combinations to test
+        const vectorDbs: TemplateVectorDB[] = [
+          "mongo",
+          "pg",
+          "pinecone",
+          "milvus",
+          "astra",
+          "qdrant",
+          "chroma",
+          "weaviate",
+        ];
+        const toolOptions = [
+          "wikipedia.WikipediaToolSpec",
+          "google.GoogleSearchToolSpec",
+          "document_generator",
+          "artifact",
+        ];

-  const toolOptions = [
-    "wikipedia.WikipediaToolSpec",
-    "google.GoogleSearchToolSpec",
-    "document_generator",
-    "artifact",
-  ];
+        const dataSources = [
+          "--example-file",
+          "--web-source https://www.example.com",
+          "--db-source mysql+pymysql://user:pass@localhost:3306/mydb",
+        ];

-  const dataSources = [
-    "--example-file",
-    "--web-source https://www.example.com",
-    "--db-source mysql+pymysql://user:pass@localhost:3306/mydb",
-  ];
+        const observabilityOptions = ["llamatrace", "traceloop"];

-  const observabilityOptions = ["llamatrace", "traceloop"];
+        // Test vector databases
+        for (const vectorDb of vectorDbs) {
+          test(`vectorDB: ${vectorDb} ${templateType}`, async () => {
+            const cwd = await createTestDir();
+            const { pyprojectPath } = await createAndCheckLlamaProject({
+              options: {
+                cwd,
+                templateType: "streaming",
+                templateFramework,
+                dataSource: "--example-file",
+                vectorDb,
+                tools: "none",
+                port: 3000,
+                postInstallAction: "none",
+                templateUI: undefined,
+                appType: "--no-frontend",
+                llamaCloudProjectName: undefined,
+                llamaCloudIndexName: undefined,
+                observability: undefined,
+              },
+            });

-  test.describe("Mypy check", () => {
-    test.describe.configure({ retries: 0 });
+            const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
+            if (vectorDb !== "none") {
+              if (vectorDb === "pg") {
+                expect(pyprojectContent).toContain(
+                  "llama-index-vector-stores-postgres",
+                );
+              } else {
+                expect(pyprojectContent).toContain(
+                  `llama-index-vector-stores-${vectorDb}`,
+                );
+              }
+            }
+          });
+        }

-    // Test vector databases
-    for (const vectorDb of vectorDbs) {
-      test(`Mypy check for vectorDB: ${vectorDb}`, async () => {
+        // // Test tools
+        for (const tool of toolOptions) {
+          test(`tool: ${tool} ${templateType}`, async () => {
+            const cwd = await createTestDir();
+            const { pyprojectPath } = await createAndCheckLlamaProject({
+              options: {
+                cwd,
+                templateType: "streaming",
+                templateFramework,
+                dataSource: "--example-file",
+                vectorDb: "none",
+                tools: tool,
+                port: 3000,
+                postInstallAction: "none",
+                templateUI: undefined,
+                appType: "--no-frontend",
+                llamaCloudProjectName: undefined,
+                llamaCloudIndexName: undefined,
+                observability: undefined,
+              },
+            });
+
+            const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
+            if (tool === "wikipedia.WikipediaToolSpec") {
+              expect(pyprojectContent).toContain("wikipedia");
+            }
+            if (tool === "google.GoogleSearchToolSpec") {
+              expect(pyprojectContent).toContain("google");
+            }
+          });
+        }
+
+        // // Test data sources
+        for (const dataSource of dataSources) {
+          test(`data source: ${dataSource} ${templateType}`, async () => {
+          const dataSourceType = dataSource.split(" ")[0];
+            const cwd = await createTestDir();
+            const { pyprojectPath } = await createAndCheckLlamaProject({
+              options: {
+                cwd,
+                templateType: "streaming",
+                templateFramework,
+                dataSource,
+                vectorDb: "none",
+                tools: "none",
+                port: 3000,
+                postInstallAction: "none",
+                templateUI: undefined,
+                appType: "--no-frontend",
+                llamaCloudProjectName: undefined,
+                llamaCloudIndexName: undefined,
+                observability: undefined,
+              },
+            });
+
+            const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
+            if (dataSource.includes("--web-source")) {
+              expect(pyprojectContent).toContain("llama-index-readers-web");
+            }
+            if (dataSource.includes("--db-source")) {
+              expect(pyprojectContent).toContain("llama-index-readers-database");
+            }
+          });
+        }
+
+        // Test observability options
+        for (const observability of observabilityOptions) {
+          test.describe(`observability: ${observability} ${templateType}`, async () => {
+            const cwd = await createTestDir();
+
+            const { pyprojectPath } = await createAndCheckLlamaProject({
+              options: {
+                cwd,
+                templateType: "streaming",
+                templateFramework,
+                dataSource: "--example-file",
+                vectorDb: "none",
+                tools: "none",
+                port: 3000,
+                postInstallAction: "none",
+                templateUI: undefined,
+                appType: "--no-frontend",
+                llamaCloudProjectName: undefined,
+                llamaCloudIndexName: undefined,
+                observability,
+              },
+            });
+          });
+        }
+      }
+    });
+
+    test.describe("LlamaIndexServer", async () => {
+      test.skip(templateType !== "llamaindexserver", `skipping llamaindexserver test for ${templateType}`);
+      test.skip(dataSource !== "--example-file", `skipping llamaindexserver test for ${dataSource}`);
+      for (const useCase of useCases) {
        const cwd = await createTestDir();
-        const { pyprojectPath } = await createAndCheckLlamaProject({
+        await createAndCheckLlamaProject({
          options: {
            cwd,
-            templateType: "streaming",
-            templateFramework,
-            dataSource: "--example-file",
-            vectorDb,
-            tools: "none",
-            port: 3000,
-            postInstallAction: "none",
-            templateUI: undefined,
-            appType: "--no-frontend",
-            llamaCloudProjectName: undefined,
-            llamaCloudIndexName: undefined,
-            observability: undefined,
-          },
-        });
-
-        const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
-        if (vectorDb !== "none") {
-          if (vectorDb === "pg") {
-            expect(pyprojectContent).toContain(
-              "llama-index-vector-stores-postgres",
-            );
-          } else {
-            expect(pyprojectContent).toContain(
-              `llama-index-vector-stores-${vectorDb}`,
-            );
-          }
-        }
-      });
-    }
-
-    // Test tools
-    for (const tool of toolOptions) {
-      test(`Mypy check for tool: ${tool}`, async () => {
-        const cwd = await createTestDir();
-        const { pyprojectPath } = await createAndCheckLlamaProject({
-          options: {
-            cwd,
-            templateType: "streaming",
-            templateFramework,
-            dataSource: "--example-file",
-            vectorDb: "none",
-            tools: tool,
-            port: 3000,
-            postInstallAction: "none",
-            templateUI: undefined,
-            appType: "--no-frontend",
-            llamaCloudProjectName: undefined,
-            llamaCloudIndexName: undefined,
-            observability: undefined,
-          },
-        });
-
-        const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
-        if (tool === "wikipedia.WikipediaToolSpec") {
-          expect(pyprojectContent).toContain("wikipedia");
-        }
-        if (tool === "google.GoogleSearchToolSpec") {
-          expect(pyprojectContent).toContain("google");
-        }
-      });
-    }
-
-    // Test data sources
-    for (const dataSource of dataSources) {
-      const dataSourceType = dataSource.split(" ")[0];
-      test(`Mypy check for data source: ${dataSourceType}`, async () => {
-        const cwd = await createTestDir();
-        const { pyprojectPath } = await createAndCheckLlamaProject({
-          options: {
-            cwd,
-            templateType: "streaming",
+            templateType: "llamaindexserver",
            templateFramework,
            dataSource,
            vectorDb: "none",
@@ -139,110 +209,77 @@ if (
            llamaCloudProjectName: undefined,
            llamaCloudIndexName: undefined,
            observability: undefined,
+            useCase,
          },
        });
+      }
+    });

-        const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
-        if (dataSource.includes("--web-source")) {
-          expect(pyprojectContent).toContain("llama-index-readers-web");
-        }
-        if (dataSource.includes("--db-source")) {
-          expect(pyprojectContent).toContain("llama-index-readers-database");
-        }
-      });
-    }
+    async function createAndCheckLlamaProject({
+      options,
+    }: {
+      options: RunCreateLlamaOptions;
+    }): Promise<{ pyprojectPath: string; projectPath: string }> {
+      const result = await runCreateLlama(options);
+      const name = result.projectName;
+      const projectPath = path.join(options.cwd, name);

-    // Test observability options
-    for (const observability of observabilityOptions) {
-      test(`Mypy check for observability: ${observability}`, async () => {
-        const cwd = await createTestDir();
+      // Check if the app folder exists
+      expect(fs.existsSync(projectPath)).toBeTruthy();

-        const { pyprojectPath } = await createAndCheckLlamaProject({
-          options: {
-            cwd,
-            templateType: "streaming",
-            templateFramework,
-            dataSource: "--example-file",
-            vectorDb: "none",
-            tools: "none",
-            port: 3000,
-            postInstallAction: "none",
-            templateUI: undefined,
-            appType: "--no-frontend",
-            llamaCloudProjectName: undefined,
-            llamaCloudIndexName: undefined,
-            observability,
-          },
-        });
-      });
-    }
-  });
-}
+      // Check if pyproject.toml exists
+      const pyprojectPath = path.join(projectPath, "pyproject.toml");
+      expect(fs.existsSync(pyprojectPath)).toBeTruthy();

-async function createAndCheckLlamaProject({
-  options,
-}: {
-  options: RunCreateLlamaOptions;
-}): Promise<{ pyprojectPath: string; projectPath: string }> {
-  const result = await runCreateLlama(options);
-  const name = result.projectName;
-  const projectPath = path.join(options.cwd, name);
+      // Modify environment for the command
+      const commandEnv = {
+        ...process.env,
+      };

-  // Check if the app folder exists
-  expect(fs.existsSync(projectPath)).toBeTruthy();
+      console.log("Running uv venv...");
+      try {
+        const { stdout: venvStdout, stderr: venvStderr } = await execAsync(
+          "uv venv",
+          { cwd: projectPath, env: commandEnv },
+        );
+        console.log("uv venv stdout:", venvStdout);
+        console.error("uv venv stderr:", venvStderr);
+      } catch (error) {
+        console.error("Error running uv venv:", error);
+        throw error; // Re-throw error to fail the test
+      }

-  // Check if pyproject.toml exists
-  const pyprojectPath = path.join(projectPath, "pyproject.toml");
-  expect(fs.existsSync(pyprojectPath)).toBeTruthy();
+      console.log("Running uv sync...");
+      try {
+        const { stdout: syncStdout, stderr: syncStderr } = await execAsync(
+          "uv sync --all-extras",
+          { cwd: projectPath, env: commandEnv },
+        );
+        console.log("uv sync stdout:", syncStdout);
+        console.error("uv sync stderr:", syncStderr);
+      } catch (error) {
+        console.error("Error running uv sync:", error);
+        throw error; // Re-throw error to fail the test
+      }

-  // Modify environment for the command
-  const commandEnv = {
-    ...process.env,
-  };
+      console.log("Running uv run mypy ....");
+      try {
+        const { stdout: mypyStdout, stderr: mypyStderr } = await execAsync(
+          "uv run mypy .",
+          { cwd: projectPath, env: commandEnv },
+        );
+        console.log("uv run mypy stdout:", mypyStdout);
+        console.error("uv run mypy stderr:", mypyStderr);
+        // Assuming mypy success means no output or specific success message
+        // Adjust checks based on actual expected mypy output
+      } catch (error) {
+        console.error("Error running mypy:", error);
+        throw error;
+      }

-  console.log("Running uv venv...");
-  try {
-    const { stdout: venvStdout, stderr: venvStderr } = await execAsync(
-      "uv venv",
-      { cwd: projectPath, env: commandEnv },
-    );
-    console.log("uv venv stdout:", venvStdout);
-    console.error("uv venv stderr:", venvStderr);
-  } catch (error) {
-    console.error("Error running uv venv:", error);
-    throw error; // Re-throw error to fail the test
+      // If we reach this point without throwing an error, the test passes
+      expect(true).toBeTruthy();
+
+    return { pyprojectPath, projectPath };
  }
-
-  console.log("Running uv sync...");
-  try {
-    const { stdout: syncStdout, stderr: syncStderr } = await execAsync(
-      "uv sync --all-extras",
-      { cwd: projectPath, env: commandEnv },
-    );
-    console.log("uv sync stdout:", syncStdout);
-    console.error("uv sync stderr:", syncStderr);
-  } catch (error) {
-    console.error("Error running uv sync:", error);
-    throw error; // Re-throw error to fail the test
-  }
-
-  console.log("Running uv run mypy ....");
-  try {
-    const { stdout: mypyStdout, stderr: mypyStderr } = await execAsync(
-      "uv run mypy .",
-      { cwd: projectPath, env: commandEnv },
-    );
-    console.log("uv run mypy stdout:", mypyStdout);
-    console.error("uv run mypy stderr:", mypyStderr);
-    // Assuming mypy success means no output or specific success message
-    // Adjust checks based on actual expected mypy output
-  } catch (error) {
-    console.error("Error running mypy:", error);
-    throw error;
-  }
-
-  // If we reach this point without throwing an error, the test passes
-  expect(true).toBeTruthy();
-
-  return { pyprojectPath, projectPath };
-}
+});
@@ -1,5 +1,5 @@
 import { expect, test } from "@playwright/test";
-import { ChildProcess } from "child_process";
+import { ChildProcess, execSync } from "child_process";
 import fs from "fs";
 import path from "path";
 import type {
@@ -12,21 +12,31 @@ import { createTestDir, runCreateLlama, type AppType } from "../utils";
 const templateFramework: TemplateFramework = process.env.FRAMEWORK
  ? (process.env.FRAMEWORK as TemplateFramework)
  : "fastapi";
-const dataSource: string = "--example-file";
+const dataSource: string = process.env.DATASOURCE
+  ? (process.env.DATASOURCE as string)
+  : "--example-file";
+const llamaCloudProjectName = "create-llama";
+const llamaCloudIndexName = "e2e-test";
+
 const templateUI: TemplateUI = "shadcn";
 const templatePostInstallAction: TemplatePostInstallAction = "runApp";
 const appType: AppType = "--frontend";
 const userMessage = "Write a blog post about physical standards for letters";
-const templateUseCases = ["financial_report", "agentic_rag", "deep_research"];
+const templateUseCases = [
+  "agentic_rag",
+  // "financial_report",
+  // "deep_research",
+  // "code_generator",
+];
+const ejectDir = "next";

 for (const useCase of templateUseCases) {
  test.describe(`Test use case ${useCase} ${templateFramework} ${dataSource} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => {
    test.skip(
-      process.platform !== "linux" ||
-        process.env.DATASOURCE === "--no-files" ||
-        templateFramework === "express",
+      dataSource === "--no-files" || templateFramework === "express",
      "The llamaindexserver template currently only works with nextjs, fastapi. We also only run on Linux to speed up tests.",
    );
+    const useLlamaParse = dataSource === "--llamacloud";
    let port: number;
    let cwd: string;
    let name: string;
@@ -48,6 +58,9 @@ for (const useCase of templateUseCases) {
        templateUI,
        appType,
        useCase,
+        llamaCloudProjectName,
+        llamaCloudIndexName,
+        useLlamaParse,
      });
      name = result.projectName;
      appProcess = result.appProcess;
@@ -98,6 +111,28 @@ for (const useCase of templateUseCases) {
      expect(response.ok()).toBeTruthy();
    });

+    test("Should successfully eject, install dependencies and build without errors", async () => {
+      test.skip(
+        templateFramework !== "nextjs" ||
+          useCase !== "code_generator" ||
+          dataSource === "--llamacloud",
+        "Eject test only applies to Next.js framework, code generator use case, and non-llamacloud",
+      );
+
+      // Run eject command
+      execSync("npm run eject", { cwd: path.join(cwd, name) });
+
+      // Verify next directory exists
+      const nextDirExists = fs.existsSync(path.join(cwd, name, ejectDir));
+      expect(nextDirExists).toBeTruthy();
+
+      // Install dependencies in next directory
+      execSync("npm install", { cwd: path.join(cwd, name, ejectDir) });
+
+      // Run build
+      execSync("npm run build", { cwd: path.join(cwd, name, ejectDir) });
+    });
+
    // clean processes
    test.afterAll(async () => {
      appProcess?.kill();
@@ -3,7 +3,12 @@ import { exec } from "child_process";
 import fs from "fs";
 import path from "path";
 import util from "util";
-import { TemplateFramework, TemplateVectorDB } from "../../helpers/types";
+import {
+  TemplateFramework,
+  TemplateType,
+  TemplateUseCase,
+  TemplateVectorDB,
+} from "../../helpers/types";
 import { createTestDir, runCreateLlama } from "../utils";

 const execAsync = util.promisify(exec);
@@ -11,6 +16,16 @@ const execAsync = util.promisify(exec);
 const templateFramework: TemplateFramework = process.env.FRAMEWORK
  ? (process.env.FRAMEWORK as TemplateFramework)
  : "nextjs";
+const templateType: TemplateType = process.env.TEMPLATE_TYPE
+  ? (process.env.TEMPLATE_TYPE as TemplateType)
+  : "llamaindexserver";
+const useCases: TemplateUseCase[] = [
+  "agentic_rag",
+  "deep_research",
+  "financial_report",
+  "code_generator",
+  "document_generator",
+];
 const dataSource: string = process.env.DATASOURCE
  ? process.env.DATASOURCE
  : "--example-file";
@@ -29,77 +44,118 @@ const vectorDbs: TemplateVectorDB[] = [
 ];

 test.describe("Test resolve TS dependencies", () => {
+  test.describe.configure({ retries: 0 });
+
  // Test vector DBs without LlamaParse
  for (const vectorDb of vectorDbs) {
-    const optionDescription = `vectorDb: ${vectorDb}, dataSource: ${dataSource}`;
+    const optionDescription = `templateType: ${templateType}, vectorDb: ${vectorDb}, dataSource: ${dataSource}`;

    test(`Vector DB test - ${optionDescription}`, async () => {
-      await runTest(vectorDb, false);
+      // skip vectordb test for llamaindexserver
+      test.skip(
+        templateType === "llamaindexserver",
+        "skipping vectorDB test for llamaindexserver",
+      );
+
+      await runTest({
+        templateType: templateType,
+        useLlamaParse: false, // Disable LlamaParse for vectorDB test
+        vectorDb: vectorDb,
+      });
    });
  }

-  // Test LlamaParse with vectorDB 'none'
-  test(`LlamaParse test - vectorDb: none, dataSource: ${dataSource}, llamaParse: true`, async () => {
-    await runTest("none", true);
-  });
-
-  async function runTest(
-    vectorDb: TemplateVectorDB | "none",
-    useLlamaParse: boolean,
-  ) {
-    const cwd = await createTestDir();
-
-    const result = await runCreateLlama({
-      cwd: cwd,
-      templateType: "streaming",
-      templateFramework: templateFramework,
-      dataSource: dataSource,
-      vectorDb: vectorDb,
-      port: 3000,
-      postInstallAction: "none",
-      templateUI: undefined,
-      appType: templateFramework === "nextjs" ? "" : "--no-frontend",
-      llamaCloudProjectName: undefined,
-      llamaCloudIndexName: undefined,
-      tools: undefined,
-      useLlamaParse: useLlamaParse,
-    });
-    const name = result.projectName;
-
-    // Check if the app folder exists
-    const appDir = path.join(cwd, name);
-    const dirExists = fs.existsSync(appDir);
-    expect(dirExists).toBeTruthy();
-
-    // Install dependencies using pnpm
-    try {
-      const { stderr: installStderr } = await execAsync(
-        "pnpm install --prefer-offline --ignore-workspace",
-        {
-          cwd: appDir,
-        },
-      );
-    } catch (error) {
-      console.error("Error installing dependencies:", error);
-      throw error;
-    }
-
-    // Run tsc type check and capture the output
-    try {
-      const { stdout, stderr } = await execAsync(
-        "pnpm exec tsc -b --diagnostics",
-        {
-          cwd: appDir,
-        },
-      );
-      // Check if there's any error output
-      expect(stderr).toBeFalsy();
-
-      // Log the stdout for debugging purposes
-      console.log("TypeScript type-check output:", stdout);
-    } catch (error) {
-      console.error("Error running tsc:", error);
-      throw error;
+  // No vectorDB, with LlamaParse and useCase
+  // Only need to test use case with example data source
+  if (dataSource === "--example-file") {
+    for (const useCase of useCases) {
+      const optionDescription = `templateType: ${templateType}, useCase: ${useCase}`;
+      test.describe(`useCase test - ${optionDescription}`, () => {
+        test.skip(
+          templateType === "streaming",
+          "Skipping use case test for streaming template.",
+        );
+        test(`no llamaParse - ${optionDescription}`, async () => {
+          await runTest({
+            templateType: templateType,
+            useLlamaParse: false,
+            useCase: useCase,
+          });
+        });
+        // Skipping llamacloud for the use case doesn't use index.
+        if (useCase !== "code_generator" && useCase !== "document_generator") {
+          test(`llamaParse - ${optionDescription}`, async () => {
+            await runTest({
+              templateType: templateType,
+              useLlamaParse: true,
+              useCase: useCase,
+            });
+          });
+        }
+      });
    }
  }
 });
+
+async function runTest(options: {
+  templateType: TemplateType;
+  useLlamaParse: boolean;
+  useCase?: TemplateUseCase;
+  vectorDb?: TemplateVectorDB;
+}) {
+  const cwd = await createTestDir();
+
+  const result = await runCreateLlama({
+    cwd: cwd,
+    templateType: options.templateType,
+    templateFramework: templateFramework,
+    dataSource: dataSource,
+    vectorDb: options.vectorDb ?? "none",
+    port: 3000,
+    postInstallAction: "none",
+    templateUI: undefined,
+    appType: templateFramework === "nextjs" ? "" : "--no-frontend",
+    llamaCloudProjectName: undefined,
+    llamaCloudIndexName: undefined,
+    tools: undefined,
+    useLlamaParse: options.useLlamaParse,
+    useCase: options.useCase,
+  });
+  const name = result.projectName;
+
+  // Check if the app folder exists
+  const appDir = path.join(cwd, name);
+  const dirExists = fs.existsSync(appDir);
+  expect(dirExists).toBeTruthy();
+
+  // Install dependencies using pnpm
+  try {
+    const { stderr: installStderr } = await execAsync(
+      "pnpm install --prefer-offline --ignore-workspace",
+      {
+        cwd: appDir,
+      },
+    );
+  } catch (error) {
+    console.error("Error installing dependencies:", error);
+    throw error;
+  }
+
+  // Run tsc type check and capture the output
+  try {
+    const { stdout, stderr } = await execAsync(
+      "pnpm exec tsc -b --diagnostics",
+      {
+        cwd: appDir,
+      },
+    );
+    // Check if there's any error output
+    expect(stderr).toBeFalsy();
+
+    // Log the stdout for debugging purposes
+    console.log("TypeScript type-check output:", stdout);
+  } catch (error) {
+    console.error("Error running tsc:", error);
+    throw error;
+  }
+}
@@ -18,6 +18,7 @@ import {
  ModelConfig,
  TemplateDataSource,
  TemplateFramework,
+  TemplateUseCase,
  TemplateVectorDB,
 } from "./types";
 import { installTSTemplate } from "./typescript";
@@ -60,6 +61,7 @@ async function generateContextData(
  vectorDb?: TemplateVectorDB,
  llamaCloudKey?: string,
  useLlamaParse?: boolean,
+  useCase?: TemplateUseCase,
 ) {
  if (packageManager) {
    const runGenerate = `${cyan(
@@ -96,7 +98,12 @@ async function generateContextData(
        }
      } else {
        console.log(`Running ${runGenerate} to generate the context data.`);
-        await callPackageManager(packageManager, true, ["run", "generate"]);
+        const shouldRunGenerate =
+          useCase !== "code_generator" && useCase !== "document_generator"; // Artifact use case doesn't use index.
+
+        if (shouldRunGenerate) {
+          await callPackageManager(packageManager, true, ["run", "generate"]);
+        }
        return;
      }
    }
@@ -224,6 +231,7 @@ export const installTemplate = async (
        props.vectorDb,
        props.llamaCloudKey,
        props.useLlamaParse,
+        props.useCase,
      );
    }

@@ -5,6 +5,7 @@ import { parse, stringify } from "smol-toml";
 import terminalLink from "terminal-link";
 import { isUvAvailable, tryUvSync } from "./uv";

+import { isCI } from "ci-info";
 import { assetRelocator, copy } from "./copy";
 import { templatesDir } from "./dir";
 import { Tool } from "./tools";
@@ -94,6 +95,10 @@ const getAdditionalDependencies = (
        name: "llama-index-vector-stores-chroma",
        version: ">=0.4.0,<0.5.0",
      });
+      dependencies.push({
+        name: "onnxruntime",
+        version: "<1.22.0",
+      });
      break;
    }
    case "weaviate": {
@@ -263,7 +268,7 @@ const getAdditionalDependencies = (
    if (observability === "traceloop") {
      dependencies.push({
        name: "traceloop-sdk",
-        version: ">=0.15.11,<0.16.0",
+        version: ">=0.15.11",
      });
    }
    if (observability === "llamatrace") {
@@ -274,6 +279,19 @@ const getAdditionalDependencies = (
    }
  }

+  // If app template is llama-index-server and CI and SERVER_PACKAGE_PATH is set,
+  // add @llamaindex/server to dependencies
+  if (
+    templateType === "llamaindexserver" &&
+    isCI &&
+    process.env.SERVER_PACKAGE_PATH
+  ) {
+    dependencies.push({
+      name: "llama-index-server",
+      version: `@file://${process.env.SERVER_PACKAGE_PATH}`,
+    });
+  }
+
  return dependencies;
 };

@@ -565,13 +583,19 @@ const installLlamaIndexServerTemplate = async ({

  await copy("*.py", path.join(root, "app"), {
    parents: true,
-    cwd: path.join(templatesDir, "components", "workflows", "python", useCase),
+    cwd: path.join(templatesDir, "components", "use-cases", "python", useCase),
  });

  // Copy custom UI component code
  await copy(`*`, path.join(root, "components"), {
    parents: true,
-    cwd: path.join(templatesDir, "components", "ui", "workflows", useCase),
+    cwd: path.join(templatesDir, "components", "ui", "use-cases", useCase),
+  });
+
+  // Copy layout components to layout folder in root
+  await copy("*", path.join(root, "layout"), {
+    parents: true,
+    cwd: path.join(templatesDir, "components", "ui", "layout"),
  });

  if (useLlamaParse) {
@@ -602,7 +626,7 @@ const installLlamaIndexServerTemplate = async ({
  // Copy README.md
  await copy("README-template.md", path.join(root), {
    parents: true,
-    cwd: path.join(templatesDir, "components", "workflows", "python", useCase),
+    cwd: path.join(templatesDir, "components", "use-cases", "python", useCase),
    rename: assetRelocator,
  });
 };
@@ -673,6 +697,7 @@ export const installPythonTemplate = async ({
    dataSources,
    tools,
    template,
+    observability,
  );

  await addDependencies(root, addOnDependencies);
@@ -58,7 +58,8 @@ export type TemplateUseCase =
  | "extractor"
  | "contract_review"
  | "agentic_rag"
-  | "artifacts";
+  | "code_generator"
+  | "document_generator";
 // Config for both file and folder
 export type FileSourceConfig =
  | {
@@ -31,23 +31,30 @@ const installLlamaIndexServerTemplate = async ({
    process.exit(1);
  }

-  await copy("workflow.ts", path.join(root, "src", "app"), {
-    parents: true,
+  await copy("**", path.join(root), {
    cwd: path.join(
      templatesDir,
      "components",
-      "workflows",
+      "use-cases",
      "typescript",
      useCase,
    ),
+    rename: assetRelocator,
  });

-  // copy workflow UI components to output/components folder
+  // copy workflow UI components to components folder in root
  await copy("*", path.join(root, "components"), {
    parents: true,
-    cwd: path.join(templatesDir, "components", "ui", "workflows", useCase),
+    cwd: path.join(templatesDir, "components", "ui", "use-cases", useCase),
  });

+  // copy layout components to layout folder in root
+  await copy("*", path.join(root, "layout"), {
+    parents: true,
+    cwd: path.join(templatesDir, "components", "ui", "layout"),
+  });
+
+  // Override generate.ts if workflow use case doesn't use custom UI
  if (vectorDb === "llamacloud") {
    await copy("generate.ts", path.join(root, "src"), {
      parents: true,
@@ -74,18 +81,14 @@ const installLlamaIndexServerTemplate = async ({
      rename: () => "data.ts",
    });
  }
-  // Copy README.md
-  await copy("README-template.md", path.join(root), {
-    parents: true,
-    cwd: path.join(
-      templatesDir,
-      "components",
-      "workflows",
-      "typescript",
-      useCase,
-    ),
-    rename: assetRelocator,
-  });
+
+  // Simplify use case code
+  if (useCase === "code_generator" || useCase === "document_generator") {
+    // Artifact use case doesn't use index.
+    // We don't need data.ts, generate.ts
+    await fs.rm(path.join(root, "src", "app", "data.ts"));
+    // TODO: Remove generate index in generate.ts and package.json if possible
+  }
 };

 const installLegacyTSTemplate = async ({
@@ -390,7 +393,7 @@ const providerDependencies: {
  [key in ModelProvider]?: Record<string, string>;
 } = {
  openai: {
-    "@llamaindex/openai": "^0.2.0",
+    "@llamaindex/openai": "~0.4.0",
  },
  gemini: {
    "@llamaindex/google": "^0.2.0",
@@ -516,7 +519,7 @@ async function updatePackageJson({
  if (backend) {
    packageJson.dependencies = {
      ...packageJson.dependencies,
-      "@llamaindex/readers": "^2.0.0",
+      "@llamaindex/readers": "~3.1.4",
    };

    if (vectorDb && vectorDb in vectorDbDependencies) {
@@ -546,6 +549,16 @@ async function updatePackageJson({
    };
  }

+  // if having custom server package tgz file, use it for testing @llamaindex/server
+  const serverPackagePath = process.env.SERVER_PACKAGE_PATH;
+  if (serverPackagePath && template === "llamaindexserver") {
+    const relativePath = path.relative(process.cwd(), serverPackagePath);
+    packageJson.dependencies = {
+      ...packageJson.dependencies,
+      "@llamaindex/server": `file:${relativePath}`,
+    };
+  }
+
  await fs.writeFile(
    packageJsonFile,
    JSON.stringify(packageJson, null, 2) + os.EOL,
@@ -196,7 +196,7 @@ const program = new Command(packageJson.name)
    "--pro",
    `

-  Allow interactive selection of all features.
+  Deprecated: Allow interactive selection of all features.
 `,
    false,
  )
@@ -1,6 +1,6 @@
 {
  "name": "create-llama",
-  "version": "0.5.12",
+  "version": "0.5.21",
  "description": "Create LlamaIndex-powered apps with one command",
  "keywords": [
    "rag",
@@ -6,7 +6,7 @@ const defaults: Omit<QuestionArgs, "modelConfig"> = {
  framework: "nextjs",
  ui: "shadcn",
  frontend: false,
-  llamaCloudKey: "",
+  llamaCloudKey: undefined,
  useLlamaParse: false,
  communityProjectConfig: undefined,
  llamapack: "",
@@ -1,4 +1,5 @@
 import ciInfo from "ci-info";
+import { bold, yellow } from "picocolors";
 import { getCIQuestionResults } from "./ci";
 import { askProQuestions } from "./questions";
 import { askSimpleQuestions } from "./simple";
@@ -13,6 +14,12 @@ export const askQuestions = async (
    return await getCIQuestionResults(args);
  } else if (args.pro) {
    // TODO: refactor pro questions to return a result object
+    console.log(
+      yellow(
+        `Pro mode is deprecated. Please use the new templates using the ${bold("LlamaIndexServer")} by not specifying pro mode.`,
+      ),
+    );
+
    await askProQuestions(args);
    return args as unknown as QuestionResults;
  }
@@ -10,7 +10,8 @@ type AppType =
  | "agentic_rag"
  | "financial_report"
  | "deep_research"
-  | "artifacts";
+  | "code_generator"
+  | "document_generator";

 type SimpleAnswers = {
  appType: AppType;
@@ -47,10 +48,14 @@ export const askSimpleQuestions = async (
            "Researches and analyzes provided documents from multiple perspectives, generating a comprehensive report with citations to support key findings and insights.",
        },
        {
-          title: "Artifacts",
-          value: "artifacts",
-          description:
-            "Build your own Vercel's v0 or OpenAI's canvas-styled UI.",
+          title: "Code Generator",
+          value: "code_generator",
+          description: "Build a Vercel v0 styled code generator.",
+        },
+        {
+          title: "Document Generator",
+          value: "document_generator",
+          description: "Build a OpenAI canvas-styled document generator.",
        },
      ],
    },
@@ -62,35 +67,35 @@ export const askSimpleQuestions = async (

  let useLlamaCloud = false;

-  if (appType !== "artifacts") {
-    const { language: newLanguage } = await prompts(
-      {
-        type: "select",
-        name: "language",
-        message: "What language do you want to use?",
-        choices: [
-          { title: "Python (FastAPI)", value: "fastapi" },
-          { title: "Typescript (NextJS)", value: "nextjs" },
-        ],
-      },
-      questionHandlers,
-    );
-    language = newLanguage;
-  }
-
-  const { useLlamaCloud: newUseLlamaCloud } = await prompts(
+  const { language: newLanguage } = await prompts(
    {
-      type: "toggle",
-      name: "useLlamaCloud",
-      message: "Do you want to use LlamaCloud services?",
-      initial: false,
-      active: "Yes",
-      inactive: "No",
-      hint: "see https://www.llamaindex.ai/enterprise for more info",
+      type: "select",
+      name: "language",
+      message: "What language do you want to use?",
+      choices: [
+        { title: "Python (FastAPI)", value: "fastapi" },
+        { title: "Typescript (NextJS)", value: "nextjs" },
+      ],
    },
    questionHandlers,
  );
-  useLlamaCloud = newUseLlamaCloud;
+  language = newLanguage;
+
+  if (appType !== "code_generator" && appType !== "document_generator") {
+    const { useLlamaCloud: newUseLlamaCloud } = await prompts(
+      {
+        type: "toggle",
+        name: "useLlamaCloud",
+        message: "Do you want to use LlamaCloud services?",
+        initial: false,
+        active: "Yes",
+        inactive: "No",
+        hint: "see https://www.llamaindex.ai/enterprise for more info",
+      },
+      questionHandlers,
+    );
+    useLlamaCloud = newUseLlamaCloud;
+  }

  if (useLlamaCloud && !llamaCloudKey) {
    // Ask for LlamaCloud API key, if not set
@@ -153,7 +158,13 @@ const convertAnswers = async (
      tools: [],
      modelConfig: MODEL_GPT41,
    },
-    artifacts: {
+    code_generator: {
+      template: "llamaindexserver",
+      dataSources: [],
+      tools: [],
+      modelConfig: MODEL_GPT41,
+    },
+    document_generator: {
      template: "llamaindexserver",
      dataSources: [],
      tools: [],
@@ -1,5 +1,9 @@
-import { Document, LLamaCloudFileService, VectorStoreIndex } from "llamaindex";
-import { LlamaCloudIndex } from "llamaindex/cloud/LlamaCloudIndex";
+import {
+  Document,
+  LLamaCloudFileService,
+  LlamaCloudIndex,
+  VectorStoreIndex,
+} from "llamaindex";
 import { DocumentFile } from "../streaming/annotations";
 import { parseFile, storeFile } from "./helper";
 import { runPipeline } from "./pipeline";
@@ -10,8 +10,9 @@ dependencies = [
    "python-dotenv>=1.0.0",
    "pydantic<2.10",
    "llama-index>=0.12.1",
+    "llama-parse>=0.6.21,<0.7.0",
    "cachetools>=5.3.3",
-    "reflex>=0.6.2.post1",
+    "reflex==0.7.10",
 ]

 [project.scripts]
@@ -11,8 +11,9 @@ dependencies = [
    "python-dotenv>=1.0.0",
    "pydantic<2.10",
    "llama-index>=0.12.1",
+    "llama-parse>=0.6.21,<0.7.0",
    "cachetools>=5.3.3",
-    "reflex>=0.6.2.post1",
+    "reflex==0.7.10",
 ]

 [project.scripts]
@@ -0,0 +1,40 @@
+"use client";
+
+import { Sparkles, Star } from "lucide-react";
+
+export default function Header() {
+  return (
+    <div className="flex items-center justify-between p-2 px-4">
+      <div className="flex items-center gap-2">
+        <Sparkles className="size-4" />
+        <h1 className="font-semibold">LlamaIndex App</h1>
+      </div>
+      <div className="flex items-center justify-end gap-4">
+        <div className="flex items-center gap-2">
+          <a
+            href="https://www.llamaindex.ai/"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-sm text-gray-600 hover:text-gray-800 dark:text-gray-400 dark:hover:text-gray-200"
+          >
+            Built by LlamaIndex
+          </a>
+          <img
+            className="h-[24px] w-[24px] rounded-sm"
+            src="/llama.png"
+            alt="Llama Logo"
+          />
+        </div>
+        <a
+          href="https://github.com/run-llama/LlamaIndexTS"
+          target="_blank"
+          rel="noopener noreferrer"
+          className="hover:bg-accent flex items-center gap-2 rounded-md border border-gray-300 px-2 py-1 text-sm"
+        >
+          <Star className="size-4" />
+          Star on GitHub
+        </a>
+      </div>
+    </div>
+  );
+}
@@ -113,11 +113,6 @@ function ArtifactWorkflowCard({ event }) {
              state === "plan" && "bg-blue-200",
              state === "generate" && "bg-violet-200",
            )}
-            indicatorClassName={cn(
-              "transition-all duration-500",
-              state === "plan" && "bg-blue-500",
-              state === "generate" && "bg-violet-500",
-            )}
          />
        </div>
      </Card>
@@ -0,0 +1,132 @@
+import { Badge } from "@/components/ui/badge";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Progress } from "@/components/ui/progress";
+import { Skeleton } from "@/components/ui/skeleton";
+import { cn } from "@/lib/utils";
+import { Markdown } from "@llamaindex/chat-ui/widgets";
+import { ListChecks, Loader2, Wand2 } from "lucide-react";
+import { useEffect, useState } from "react";
+
+const STAGE_META = {
+  plan: {
+    icon: ListChecks,
+    badgeText: "Step 1/2: Planning",
+    gradient: "from-blue-100 via-blue-50 to-white",
+    progress: 33,
+    iconBg: "bg-blue-100 text-blue-600",
+    badge: "bg-blue-100 text-blue-700",
+  },
+  generate: {
+    icon: Wand2,
+    badgeText: "Step 2/2: Generating",
+    gradient: "from-violet-100 via-violet-50 to-white",
+    progress: 66,
+    iconBg: "bg-violet-100 text-violet-600",
+    badge: "bg-violet-100 text-violet-700",
+  },
+};
+
+function ArtifactWorkflowCard({ event }) {
+  const [visible, setVisible] = useState(event?.state !== "completed");
+  const [fade, setFade] = useState(false);
+
+  useEffect(() => {
+    if (event?.state === "completed") {
+      setVisible(false);
+    } else {
+      setVisible(true);
+      setFade(false);
+    }
+  }, [event?.state]);
+
+  if (!event || !visible) return null;
+
+  const { state, requirement } = event;
+  const meta = STAGE_META[state];
+
+  if (!meta) return null;
+
+  return (
+    <div className="flex min-h-[180px] w-full items-center justify-center py-2">
+      <Card
+        className={cn(
+          "w-full rounded-xl shadow-md transition-all duration-500",
+          "border-0",
+          fade && "pointer-events-none opacity-0",
+          `bg-gradient-to-br ${meta.gradient}`,
+        )}
+        style={{
+          boxShadow:
+            "0 2px 12px 0 rgba(80, 80, 120, 0.08), 0 1px 3px 0 rgba(80, 80, 120, 0.04)",
+        }}
+      >
+        <CardHeader className="flex flex-row items-center gap-2 px-3 pb-1 pt-2">
+          <div
+            className={cn(
+              "flex items-center justify-center rounded-full p-1",
+              meta.iconBg,
+            )}
+          >
+            <meta.icon className="h-5 w-5" />
+          </div>
+          <CardTitle className="flex items-center gap-2 text-base font-semibold">
+            <Badge className={cn("ml-1", meta.badge, "px-2 py-0.5 text-xs")}>
+              {meta.badgeText}
+            </Badge>
+          </CardTitle>
+        </CardHeader>
+        <CardContent className="px-3 py-1">
+          {state === "plan" && (
+            <div className="flex flex-col items-center gap-2 py-2">
+              <Loader2 className="mb-1 h-6 w-6 animate-spin text-blue-400" />
+              <div className="text-center text-sm font-medium text-blue-900">
+                Analyzing your request...
+              </div>
+              <Skeleton className="mt-1 h-3 w-1/2 rounded-full" />
+            </div>
+          )}
+          {state === "generate" && (
+            <div className="flex flex-col gap-2 py-2">
+              <div className="flex items-center gap-1">
+                <Loader2 className="h-4 w-4 animate-spin text-violet-400" />
+                <span className="text-sm font-medium text-violet-900">
+                  Working on the requirement:
+                </span>
+              </div>
+              <div className="max-h-24 overflow-auto rounded-lg border border-violet-200 bg-violet-50 px-2 py-1 text-xs">
+                {requirement ? (
+                  <Markdown content={requirement} />
+                ) : (
+                  <span className="italic text-violet-400">
+                    No requirements available yet.
+                  </span>
+                )}
+              </div>
+            </div>
+          )}
+        </CardContent>
+        <div className="px-3 pb-2 pt-1">
+          <Progress
+            value={meta.progress}
+            className={cn(
+              "h-1 rounded-full bg-gray-200",
+              state === "plan" && "bg-blue-200",
+              state === "generate" && "bg-violet-200",
+            )}
+          />
+        </div>
+      </Card>
+    </div>
+  );
+}
+
+export default function Component({ events }) {
+  const aggregateEvents = () => {
+    if (!events || events.length === 0) return null;
+    return events[events.length - 1];
+  };
+
+  const event = aggregateEvents();
+
+  return <ArtifactWorkflowCard event={event} />;
+}
@@ -3,9 +3,12 @@ from typing import Optional
 from app.index import get_index
 from llama_index.core.agent.workflow import AgentWorkflow
 from llama_index.core.settings import Settings
-from llama_index.llms.openai import OpenAI
 from llama_index.server.api.models import ChatRequest
 from llama_index.server.tools.index import get_query_engine_tool
+from llama_index.server.tools.index.citation import (
+    CITATION_SYSTEM_PROMPT,
+    enable_citation,
+)


 def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow:
@@ -14,9 +17,16 @@ def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow
        raise RuntimeError(
            "Index not found! Please run `uv run generate` to index the data first."
        )
-    query_tool = get_query_engine_tool(index=index)
+    # Create a query tool with citations enabled
+    query_tool = enable_citation(get_query_engine_tool(index=index))
+
+    # Define the system prompt for the agent
+    # Append the citation system prompt to the system prompt
+    system_prompt = """You are a helpful assistant"""
+    system_prompt += CITATION_SYSTEM_PROMPT
+
    return AgentWorkflow.from_tools_or_functions(
        tools_or_functions=[query_tool],
-        llm=Settings.llm or OpenAI(model="gpt-4o-mini"),
-        system_prompt="You are a helpful assistant.",
+        llm=Settings.llm,
+        system_prompt=system_prompt,
    )
@@ -0,0 +1,65 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/).
+
+## Getting Started
+
+First, setup the environment with uv:
+
+> **_Note:_** This step is not needed if you are using the dev-container.
+
+```shell
+uv sync
+```
+
+Then check the parameters that have been pre-configured in the `.env` file in this directory.
+Make sure you have set the `OPENAI_API_KEY` for the LLM.
+
+Then, run the development server:
+
+```shell
+uv run fastapi dev
+```
+
+Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
+
+To start the app optimized for **production**, run:
+
+```
+uv run fastapi run
+```
+
+## Configure LLM and Embedding Model
+
+You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
+
+## Use Case
+AI-powered code generator that can help you generate app with a chat interface, code editor and app preview.
+
+To update the workflow, you can modify the code in [`workflow.py`](app/workflow.py).
+
+You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
+
+```
+curl --location 'localhost:8000/api/chat' \
+--header 'Content-Type: application/json' \
+--data '{ "messages": [{ "role": "user", "content": "Create a report comparing the finances of Apple and Tesla" }] }'
+```
+
+## Customize the UI
+
+To customize the UI, you can start by modifying the [./components/ui_event.jsx](./components/ui_event.jsx) file.
+
+You can also generate a new code for the workflow using LLM by running the following command:
+
+```
+uv run generate_ui
+```
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
+- [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
+- [LlamaIndex Server](https://pypi.org/project/llama-index-server/)
+
+You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
@@ -6,6 +6,7 @@ from llama_index.core.chat_engine.types import ChatMessage
 from llama_index.core.llms import LLM
 from llama_index.core.memory import ChatMemoryBuffer
 from llama_index.core.prompts import PromptTemplate
+from llama_index.llms.openai import OpenAI
 from llama_index.core.workflow import (
    Context,
    Event,
@@ -26,6 +27,15 @@ from llama_index.server.api.utils import get_last_artifact
 from pydantic import BaseModel, Field


+def create_workflow(chat_request: ChatRequest) -> Workflow:
+    workflow = CodeArtifactWorkflow(
+        llm=OpenAI(model="gpt-4.1"),
+        chat_request=chat_request,
+        timeout=120.0,
+    )
+    return workflow
+
+
 class Requirement(BaseModel):
    next_step: Literal["answering", "coding"]
    language: Optional[str] = None
@@ -23,7 +23,18 @@ from llama_index.core.workflow import (
    Workflow,
    step,
 )
-from llama_index.server.api.models import ChatRequest, SourceNodesEvent, UIEvent
+from llama_index.server.api.models import (
+    ArtifactEvent,
+    ArtifactType,
+    ChatRequest,
+    SourceNodesEvent,
+    UIEvent,
+    Artifact,
+    DocumentArtifactData,
+    DocumentArtifactSource,
+)
+import time
+from llama_index.server.utils.stream import write_response_to_stream
 from pydantic import BaseModel, Field

 logger = logging.getLogger("uvicorn")
@@ -365,8 +376,31 @@ class DeepResearchWorkflow(Workflow):
            user_request=self.user_request,
            stream=self.stream,
        )
+
+        final_response = await write_response_to_stream(res, ctx)
+
+        ctx.write_event_to_stream(
+            ArtifactEvent(
+                data=Artifact(
+                    type=ArtifactType.DOCUMENT,
+                    created_at=int(time.time()),
+                    data=DocumentArtifactData(
+                        title="DeepResearch Report",
+                        content=final_response,
+                        type="markdown",
+                        sources=[
+                            DocumentArtifactSource(
+                                id=node.id_,
+                            )
+                            for node in self.context_nodes
+                        ],
+                    ),
+                ),
+            )
+        )
+
        return StopEvent(
-            result=res,
+            result="",
        )


@@ -33,12 +33,9 @@ You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides

 ## Use Case

-We have prepared two artifact workflows:
+AI-powered document generator that can help you generate documents with a chat interface and simple markdown editor.

- [Code Workflow](app/code_workflow.py): To generate code and display it in the UI like Vercel's v0.
- [Document Workflow](app/document_workflow.py): Generate and update a document like OpenAI's canvas.
-
-Modify the factory method in [`workflow.py`](app/workflow.py) to decide which artifact workflow to use. Without any changes the Code Workflow is used.
+To update the workflow, you can modify the code in [`workflow.py`](app/workflow.py).

 You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:

@@ -4,6 +4,7 @@ from typing import Any, Literal, Optional

 from llama_index.core.chat_engine.types import ChatMessage
 from llama_index.core.llms import LLM
+from llama_index.llms.openai import OpenAI
 from llama_index.core.memory import ChatMemoryBuffer
 from llama_index.core.prompts import PromptTemplate
 from llama_index.core.workflow import (
@@ -26,6 +27,15 @@ from llama_index.server.api.utils import get_last_artifact
 from pydantic import BaseModel, Field


+def create_workflow(chat_request: ChatRequest) -> Workflow:
+    workflow = DocumentArtifactWorkflow(
+        llm=OpenAI(model="gpt-4.1"),
+        chat_request=chat_request,
+        timeout=120.0,
+    )
+    return workflow
+
+
 class DocumentRequirement(BaseModel):
    type: Literal["markdown", "html"]
    title: str
@@ -41,6 +41,14 @@ curl --location 'localhost:3000/api/chat' \
 --data '{ "messages": [{ "role": "user", "content": "What standards for a letter exist?" }] }'
 ```

+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
 ## Learn More

 To learn more about LlamaIndex, take a look at the following resources:
@@ -1,4 +1,4 @@
-import { agent } from "llamaindex";
+import { agent } from "@llamaindex/workflow";
 import { getIndex } from "./data";

 export const workflowFactory = async (reqBody: any) => {
@@ -0,0 +1,39 @@
+import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
+import "dotenv/config";
+import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
+import { initSettings } from "./app/settings";
+
+async function generateDatasource() {
+  console.log(`Generating storage context...`);
+  // Split documents, create embeddings and store them in the storage context
+  const storageContext = await storageContextFromDefaults({
+    persistDir: "storage",
+  });
+  // load documents from current directory into an index
+  const reader = new SimpleDirectoryReader();
+  const documents = await reader.loadData("data");
+
+  await VectorStoreIndex.fromDocuments(documents, {
+    storageContext,
+  });
+  console.log("Storage context successfully generated.");
+}
+
+(async () => {
+  const args = process.argv.slice(2);
+  const command = args[0];
+
+  initSettings();
+
+  if (command === "ui") {
+    console.error("This project doesn't use any custom UI.");
+    return;
+  } else {
+    if (command !== "datasource") {
+      console.error(
+        `Unrecognized command: ${command}. Generating datasource by default.`,
+      );
+    }
+    await generateDatasource();
+  }
+})();
@@ -0,0 +1,61 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama).
+
+## Getting Started
+
+First, install the dependencies:
+
+```
+npm install
+```
+
+Second, run the development server:
+
+```
+npm run dev
+```
+
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the chat UI.
+
+## Configure LLM and Embedding Model
+
+You can configure [LLM model](https://ts.llamaindex.ai/docs/llamaindex/modules/llms) in the [settings file](src/app/settings.ts).
+
+## Custom UI Components
+
+We have a custom component located in `components/ui_event.jsx`. This is used to display the state of artifact workflows in UI. You can regenerate a new UI component from the workflow event schema by running the following command:
+
+```
+npm run generate:ui
+```
+
+## Use Case
+
+AI-powered code generator that can help you generate app with a chat interface, code editor and app preview.
+
+To update the workflow, you can modify the code in [`workflow.ts`](app/workflow.ts).
+
+You can start by sending a request on the [chat UI](http://localhost:3000) or you can test the `/api/chat` endpoint with the following curl request:
+
+```shell
+curl --location 'localhost:3000/api/chat' \
+--header 'Content-Type: application/json' \
+--data '{ "messages": [{ "role": "user", "content": "Compare the financial performance of Apple and Tesla" }] }'
+```
+
+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
+- [LlamaIndexTS Documentation](https://ts.llamaindex.ai/docs/llamaindex) - learn about LlamaIndex (Typescript features).
+- [Workflows Introduction](https://ts.llamaindex.ai/docs/llamaindex/modules/workflows) - learn about LlamaIndexTS workflows.
+
+You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!
@@ -0,0 +1,337 @@
+import { artifactEvent, extractLastArtifact } from "@llamaindex/server";
+import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";
+
+import {
+  agentStreamEvent,
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+  stopAgentEvent,
+  workflowEvent,
+} from "@llamaindex/workflow";
+
+import { z } from "zod";
+
+export const RequirementSchema = z.object({
+  next_step: z.enum(["answering", "coding"]),
+  language: z.string().nullable().optional(),
+  file_name: z.string().nullable().optional(),
+  requirement: z.string(),
+});
+
+export type Requirement = z.infer<typeof RequirementSchema>;
+
+export const UIEventSchema = z.object({
+  type: z.literal("ui_event"),
+  data: z.object({
+    state: z
+      .enum(["plan", "generate", "completed"])
+      .describe(
+        "The current state of the workflow: 'plan', 'generate', or 'completed'.",
+      ),
+    requirement: z
+      .string()
+      .optional()
+      .describe(
+        "An optional requirement creating or updating a code, if applicable.",
+      ),
+  }),
+});
+
+export type UIEvent = z.infer<typeof UIEventSchema>;
+const planEvent = workflowEvent<{
+  userInput: MessageContent;
+  context?: string | undefined;
+}>();
+
+const generateArtifactEvent = workflowEvent<{
+  requirement: Requirement;
+}>();
+
+const synthesizeAnswerEvent = workflowEvent<object>();
+
+const uiEvent = workflowEvent<UIEvent>();
+
+export function workflowFactory(reqBody: any) {
+  const llm = Settings.llm;
+
+  const { withState, getContext } = createStatefulMiddleware(() => {
+    return {
+      memory: new ChatMemoryBuffer({ llm }),
+      lastArtifact: extractLastArtifact(reqBody),
+    };
+  });
+  const workflow = withState(createWorkflow());
+
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { userInput, chatHistory = [] } = data;
+    // Prepare chat history
+    const { state } = getContext();
+    // Put user input to the memory
+    if (!userInput) {
+      throw new Error("Missing user input to start the workflow");
+    }
+    state.memory.set(chatHistory);
+    state.memory.put({ role: "user", content: userInput });
+
+    return planEvent.with({
+      userInput: userInput,
+      context: state.lastArtifact
+        ? JSON.stringify(state.lastArtifact)
+        : undefined,
+    });
+  });
+
+  workflow.handle([planEvent], async ({ data: planData }) => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "plan",
+        },
+      }),
+    );
+    const user_msg = planData.userInput;
+    const context = planData.context
+      ? `## The context is: \n${planData.context}\n`
+      : "";
+    const prompt = `
+You are a product analyst responsible for analyzing the user's request and providing the next step for code or document generation.
+You are helping user with their code artifact. To update the code, you need to plan a coding step.
+
+Follow these instructions:
+1. Carefully analyze the conversation history and the user's request to determine what has been done and what the next step should be.
+2. The next step must be one of the following two options:
+    - "coding": To make the changes to the current code.
+    - "answering": If you don't need to update the current code or need clarification from the user.
+Important: Avoid telling the user to update the code themselves, you are the one who will update the code (by planning a coding step).
+3. If the next step is "coding", you may specify the language ("typescript" or "python") and file_name if known, otherwise set them to null. 
+4. The requirement must be provided clearly what is the user request and what need to be done for the next step in details
+    as precise and specific as possible, don't be stingy with in the requirement.
+5. If the next step is "answering", set language and file_name to null, and the requirement should describe what to answer or explain to the user.
+6. Be concise; only return the requirements for the next step.
+7. The requirements must be in the following format:
+    \`\`\`json
+    {
+        "next_step": "answering" | "coding",
+        "language": "typescript" | "python" | null,
+        "file_name": string | null,
+        "requirement": string
+    }
+    \`\`\`
+
+## Example 1:
+User request: Create a calculator app.
+You should return:
+\`\`\`json
+{
+    "next_step": "coding",
+    "language": "typescript",
+    "file_name": "calculator.tsx",
+    "requirement": "Generate code for a calculator app that has a simple UI with a display and button layout. The display should show the current input and the result. The buttons should include basic operators, numbers, clear, and equals. The calculation should work correctly."
+}
+\`\`\`
+
+## Example 2:
+User request: Explain how the game loop works.
+Context: You have already generated the code for a snake game.
+You should return:
+\`\`\`json
+{
+    "next_step": "answering",
+    "language": null,
+    "file_name": null,
+    "requirement": "The user is asking about the game loop. Explain how the game loop works."
+}
+\`\`\`
+
+${context}
+
+Now, plan the user's next step for this request:
+${user_msg}
+`;
+
+    const response = await llm.complete({
+      prompt,
+    });
+    // parse the response to Requirement
+    // 1. use regex to find the json block
+    const jsonBlock = response.text.match(/```json\s*([\s\S]*?)\s*```/);
+    if (!jsonBlock) {
+      throw new Error("No JSON block found in the response.");
+    }
+    const requirement = RequirementSchema.parse(JSON.parse(jsonBlock[1]));
+    state.memory.put({
+      role: "assistant",
+      content: `The plan for next step: \n${response.text}`,
+    });
+
+    if (requirement.next_step === "coding") {
+      return generateArtifactEvent.with({
+        requirement,
+      });
+    } else {
+      return synthesizeAnswerEvent.with({});
+    }
+  });
+
+  workflow.handle([generateArtifactEvent], async ({ data: planData }) => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "generate",
+          requirement: planData.requirement.requirement,
+        },
+      }),
+    );
+
+    const previousArtifact = state.lastArtifact
+      ? JSON.stringify(state.lastArtifact)
+      : "There is no previous artifact";
+    const requirementText = planData.requirement.requirement;
+
+    const prompt = `
+        You are a skilled developer who can help user with coding.
+        You are given a task to generate or update a code for a given requirement.
+
+        ## Follow these instructions:
+        **1. Carefully read the user's requirements.** 
+           If any details are ambiguous or missing, make reasonable assumptions and clearly reflect those in your output.
+           If the previous code is provided:
+           + Carefully analyze the code with the request to make the right changes.
+           + Avoid making a lot of changes from the previous code if the request is not to write the code from scratch again.
+        **2. For code requests:**
+           - If the user does not specify a framework or language, default to a React component using the Next.js framework.
+           - For Next.js, use Shadcn UI components, Typescript, @types/node, @types/react, @types/react-dom, PostCSS, and TailwindCSS.
+           The import pattern should be:
+           \`\`\`typescript
+           import { ComponentName } from "@/components/ui/component-name"
+           import { Markdown } from "@llamaindex/chat-ui"
+           import { cn } from "@/lib/utils"
+           \`\`\`
+           - Ensure the code is idiomatic, production-ready, and includes necessary imports.
+           - Only generate code relevant to the user's request—do not add extra boilerplate.
+        **3. Don't be verbose on response**
+           - No other text or comments only return the code which wrapped by \`\`\`language\`\`\` block.
+           - If the user's request is to update the code, only return the updated code.
+        **4. Only the following languages are allowed: "typescript", "python".**
+        **5. If there is no code to update, return the reason without any code block.**
+           
+        ## Example:
+        \`\`\`typescript
+        import React from "react";
+        import { Button } from "@/components/ui/button";
+        import { cn } from "@/lib/utils";
+
+        export default function MyComponent() {
+        return (
+           <div className="flex flex-col items-center justify-center h-screen">
+              <Button>Click me</Button>
+           </div>
+        );
+        }
+        \`\`\`
+
+        The previous code is:
+        {previousArtifact}
+
+        Now, i have to generate the code for the following requirement:
+        {requirement}
+      `
+      .replace("{previousArtifact}", previousArtifact)
+      .replace("{requirement}", requirementText);
+
+    const response = await llm.complete({
+      prompt,
+    });
+
+    // Extract the code from the response
+    const codeMatch = response.text.match(/```(\w+)([\s\S]*)```/);
+    if (!codeMatch) {
+      return synthesizeAnswerEvent.with({});
+    }
+
+    const code = codeMatch[2].trim();
+
+    // Put the generated code to the memory
+    state.memory.put({
+      role: "assistant",
+      content: `Updated the code: \n${response.text}`,
+    });
+
+    // To show the Canvas panel for the artifact
+    sendEvent(
+      artifactEvent.with({
+        type: "artifact",
+        data: {
+          type: "code",
+          created_at: Date.now(),
+          data: {
+            language: planData.requirement.language || "",
+            file_name: planData.requirement.file_name || "",
+            code,
+          },
+        },
+      }),
+    );
+
+    return synthesizeAnswerEvent.with({});
+  });
+
+  workflow.handle([synthesizeAnswerEvent], async () => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+
+    const chatHistory = await state.memory.getMessages();
+    const messages = [
+      ...chatHistory,
+      {
+        role: "system" as const,
+        content: `
+        You are a helpful assistant who is responsible for explaining the work to the user.
+        Based on the conversation history, provide an answer to the user's question. 
+        The user has access to the code so avoid mentioning the whole code again in your response.
+      `,
+      },
+    ];
+
+    const responseStream = await llm.chat({
+      messages,
+      stream: true,
+    });
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "completed",
+        },
+      }),
+    );
+
+    let response = "";
+    for await (const chunk of responseStream) {
+      response += chunk.delta;
+      sendEvent(
+        agentStreamEvent.with({
+          delta: chunk.delta,
+          response: "",
+          currentAgentName: "assistant",
+          raw: chunk,
+        }),
+      );
+    }
+
+    return stopAgentEvent.with({
+      result: response,
+    });
+  });
+
+  return workflow;
+}
@@ -31,7 +31,7 @@ You can configure [LLM model](https://ts.llamaindex.ai/docs/llamaindex/modules/l

 ## Custom UI Components

-For Deep Research, we have a custom component located in `components/deep_research_event.jsx`. This is used to display the results of the deep research workflow in a more user-friendly way
+For Deep Research, we have a custom component located in `components/ui_event.jsx`. This is used to display the results of the deep research workflow in a more user-friendly way

 ### Generate a new UI Component from workflow event

@@ -53,6 +53,14 @@ curl --location 'localhost:3000/api/chat' \
 --data '{ "messages": [{ "role": "user", "content": "Compare the financial performance of Apple and Tesla" }] }'
 ```

+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
 ## Learn More

 To learn more about LlamaIndex, take a look at the following resources:
@@ -0,0 +1,436 @@
+import { artifactEvent, toSourceEvent } from "@llamaindex/server";
+import {
+  agentStreamEvent,
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+  stopAgentEvent,
+  workflowEvent,
+} from "@llamaindex/workflow";
+import {
+  ChatMemoryBuffer,
+  LlamaCloudIndex,
+  MessageContent,
+  Metadata,
+  MetadataMode,
+  NodeWithScore,
+  PromptTemplate,
+  Settings,
+  VectorStoreIndex,
+  extractText,
+} from "llamaindex";
+import { randomUUID } from "node:crypto";
+import { z } from "zod";
+import { getIndex } from "./data";
+
+// workflow factory
+export const workflowFactory = async (reqBody: any) => {
+  const index = await getIndex(reqBody?.data);
+  return getWorkflow(index);
+};
+
+// workflow configs
+const MAX_QUESTIONS = 6; // max number of questions to research, research will stop when this number is reached
+const TOP_K = 10; // number of nodes to retrieve from the vector store
+
+const createPlanResearchPrompt = new PromptTemplate({
+  template: `
+You are a professor who is guiding a researcher to research a specific request/problem.
+Your task is to decide on a research plan for the researcher.
+
+The possible actions are:
+ Provide a list of questions for the researcher to investigate, with the purpose of clarifying the request.
+ Write a report if the researcher has already gathered enough research on the topic and can resolve the initial request.
+ Cancel the research if most of the answers from researchers indicate there is insufficient information to research the request. Do not attempt more than 3 research iterations or too many questions.
+
+The workflow should be:
+ Always begin by providing some initial questions for the researcher to investigate.
+ Analyze the provided answers against the initial topic/request. If the answers are insufficient to resolve the initial request, provide additional questions for the researcher to investigate.
+ If the answers are sufficient to resolve the initial request, instruct the researcher to write a report.
+
+Here are the context: 
+<Collected information>
+{context_str}
+</Collected information>
+
+<Conversation context>
+{conversation_context}
+</Conversation context>
+
+{enhanced_prompt}
+
+Now, provide your decision in the required format for this user request:
+<User request>
+{user_request}
+</User request>
+`,
+  templateVars: [
+    "context_str",
+    "conversation_context",
+    "enhanced_prompt",
+    "user_request",
+  ],
+});
+
+const researchPrompt = new PromptTemplate({
+  template: `
+You are a researcher who is in the process of answering the question.
+The purpose is to answer the question based on the collected information, without using prior knowledge or making up any new information.
+Always add citations to the sentence/point/paragraph using the id of the provided content.
+The citation should follow this format: [citation:id] where id is the id of the content.
+
+E.g:
+If we have a context like this:
+<Citation id='abc-xyz'>
+Baby llama is called cria
+</Citation id='abc-xyz'>
+
+And your answer uses the content, then the citation should be:
+- Baby llama is called cria [citation:abc-xyz]
+
+ Here is the provided context for the question:
+<Collected information>
+{context_str}
+</Collected information>
+
+No prior knowledge, just use the provided context to answer the question: {question}
+`,
+  templateVars: ["context_str", "question"],
+});
+
+const WRITE_REPORT_PROMPT = `
+You are a researcher writing a report based on a user request and the research context.
+You have researched various perspectives related to the user request.
+The report should provide a comprehensive outline covering all important points from the researched perspectives.
+Create a well-structured outline for the research report that covers all the answers.
+
+# IMPORTANT when writing in markdown format:
+ Use tables or figures where appropriate to enhance presentation.
+ Preserve all citation syntax (the \`[citation:id]()\` parts in the provided context). Keep these citations in the final report - no separate reference section is needed.
+ Do not add links, a table of contents, or a references section to the report.
+`;
+
+// workflow events
+type ResearchQuestion = { questionId: string; question: string };
+type ResearchResult = ResearchQuestion & { answer: string };
+
+// class PlanResearchEvent extends WorkflowEvent<{}> {}
+const planResearchEvent = workflowEvent<{}>();
+const researchEvent = workflowEvent<ResearchQuestion>();
+const reportEvent = workflowEvent<{}>();
+
+export const UIEventSchema = z
+  .object({
+    event: z
+      .enum(["retrieve", "analyze", "answer"])
+      .describe(
+        "The type of event. DeepResearch has 3 main stages:\n1. retrieve: Retrieve the context from the vector store\n2. analyze: Analyze the context and generate a research questions to answer\n3. answer: Answer the provided questions. Each question has a unique id, when the state is done, the event will have the answer for the question.",
+      ),
+    state: z
+      .enum(["pending", "inprogress", "done", "error"])
+      .describe("The state for each event"),
+    id: z.string().optional().describe("The id of the question"),
+    question: z
+      .string()
+      .optional()
+      .describe("The question generated by the LLM"),
+    answer: z.string().optional().describe("The answer generated by the LLM"),
+  })
+  .describe("DeepResearchEvent");
+
+type UIEventData = z.infer<typeof UIEventSchema>;
+
+const uiEvent = workflowEvent<{
+  type: "ui_event";
+  data: UIEventData;
+}>();
+
+// workflow definition
+export function getWorkflow(index: VectorStoreIndex | LlamaCloudIndex) {
+  const retriever = index.asRetriever({ similarityTopK: TOP_K });
+  const { withState, getContext } = createStatefulMiddleware(() => {
+    return {
+      memory: new ChatMemoryBuffer({
+        llm: Settings.llm,
+        chatHistory: [],
+      }),
+      contextNodes: [] as NodeWithScore<Metadata>[],
+      userRequest: "" as MessageContent,
+      totalQuestions: 0,
+      researchResults: [] as ResearchResult[],
+    };
+  });
+  const workflow = withState(createWorkflow());
+
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { userInput, chatHistory = [] } = data;
+    const { sendEvent, state } = getContext();
+    if (!userInput) throw new Error("Invalid input");
+
+    state.memory.set(chatHistory);
+    state.memory.put({ role: "user", content: userInput });
+    state.userRequest = userInput;
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          event: "retrieve",
+          state: "inprogress",
+        },
+      }),
+    );
+
+    const retrievedNodes = await retriever.retrieve({ query: userInput });
+
+    sendEvent(toSourceEvent(retrievedNodes));
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: { event: "retrieve", state: "done" },
+      }),
+    );
+
+    state.contextNodes.push(...retrievedNodes);
+
+    return planResearchEvent.with({});
+  });
+
+  workflow.handle([planResearchEvent], async ({ data }) => {
+    const { sendEvent, state, stream } = getContext();
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: { event: "analyze", state: "inprogress" },
+      }),
+    );
+
+    const { decision, researchQuestions, cancelReason } =
+      await createResearchPlan(
+        state.memory,
+        state.contextNodes
+          .map((node) => node.node.getContent(MetadataMode.NONE))
+          .join("\n"),
+        enhancedPrompt(state.totalQuestions),
+        state.userRequest,
+      );
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: { event: "analyze", state: "done" },
+      }),
+    );
+    if (decision === "cancel") {
+      sendEvent(
+        uiEvent.with({
+          type: "ui_event",
+          data: { event: "analyze", state: "done" },
+        }),
+      );
+      return agentStreamEvent.with({
+        delta: cancelReason ?? "Research cancelled without any reason.",
+        response: cancelReason ?? "Research cancelled without any reason.",
+        currentAgentName: "",
+        raw: null,
+      });
+    }
+    if (decision === "research" && researchQuestions.length > 0) {
+      state.totalQuestions += researchQuestions.length;
+      state.memory.put({
+        role: "assistant",
+        content:
+          "We need to find answers to the following questions:\n" +
+          researchQuestions.join("\n"),
+      });
+      researchQuestions.forEach(({ questionId: id, question }) => {
+        sendEvent(
+          uiEvent.with({
+            type: "ui_event",
+            data: { event: "answer", state: "pending", id, question },
+          }),
+        );
+        sendEvent(researchEvent.with({ questionId: id, question }));
+      });
+      const events = await stream
+        .until(() => state.researchResults.length === researchQuestions.length)
+        .toArray();
+      return planResearchEvent.with({});
+    }
+    state.memory.put({
+      role: "assistant",
+      content: "No more idea to analyze. We should report the answers.",
+    });
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: { event: "analyze", state: "done" },
+      }),
+    );
+    return reportEvent.with({});
+  });
+
+  workflow.handle([researchEvent], async ({ data }) => {
+    const { sendEvent, state } = getContext();
+    const { questionId, question } = data;
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          event: "answer",
+          state: "inprogress",
+          id: questionId,
+          question,
+        },
+      }),
+    );
+
+    const answer = await answerQuestion(
+      contextStr(state.contextNodes),
+      question,
+    );
+    state.researchResults.push({ questionId, question, answer });
+
+    state.memory.put({
+      role: "assistant",
+      content: `<Question>${question}</Question>\n<Answer>${answer}</Answer>`,
+    });
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          event: "answer",
+          state: "done",
+          id: questionId,
+          question,
+          answer,
+        },
+      }),
+    );
+  });
+
+  workflow.handle([reportEvent], async ({ data }) => {
+    const { sendEvent, state } = getContext();
+    const chatHistory = await state.memory.getAllMessages();
+    const messages = chatHistory.concat([
+      {
+        role: "system",
+        content: WRITE_REPORT_PROMPT,
+      },
+      {
+        role: "user",
+        content:
+          "Write a report addressing the user request based on the research provided the context",
+      },
+    ]);
+
+    const stream = await Settings.llm.chat({ messages, stream: true });
+    let response = "";
+    for await (const chunk of stream) {
+      response += chunk.delta;
+      sendEvent(
+        agentStreamEvent.with({
+          delta: chunk.delta,
+          response,
+          currentAgentName: "",
+          raw: stream,
+        }),
+      );
+    }
+
+    // Open the generated report in Canvas
+    sendEvent(
+      artifactEvent.with({
+        type: "artifact",
+        data: {
+          type: "document",
+          created_at: Date.now(),
+          data: {
+            title: "DeepResearch Report",
+            content: response,
+            type: "markdown",
+            sources: state.contextNodes.map((node) => ({
+              id: node.node.id_,
+            })),
+          },
+        },
+      }),
+    );
+
+    return stopAgentEvent.with({
+      result: response,
+    });
+  });
+
+  return workflow;
+}
+
+const createResearchPlan = async (
+  memory: ChatMemoryBuffer,
+  contextStr: string,
+  enhancedPrompt: string,
+  userRequest: MessageContent,
+) => {
+  const chatHistory = await memory.getMessages();
+
+  const conversationContext = chatHistory
+    .map((message) => `${message.role}: ${message.content}`)
+    .join("\n");
+
+  const prompt = createPlanResearchPrompt.format({
+    context_str: contextStr,
+    conversation_context: conversationContext,
+    enhanced_prompt: enhancedPrompt,
+    user_request: extractText(userRequest),
+  });
+
+  const responseFormat = z.object({
+    decision: z.enum(["research", "write", "cancel"]),
+    researchQuestions: z.array(z.string()),
+    cancelReason: z.string().optional(),
+  });
+
+  const result = await Settings.llm.complete({ prompt, responseFormat });
+  const plan = JSON.parse(result.text) as z.infer<typeof responseFormat>;
+
+  return {
+    ...plan,
+    researchQuestions: plan.researchQuestions.map((question) => ({
+      questionId: randomUUID(),
+      question,
+    })),
+  };
+};
+
+const contextStr = (contextNodes: NodeWithScore<Metadata>[]) => {
+  return contextNodes
+    .map((node) => {
+      const nodeId = node.node.id_;
+      const nodeContent = node.node.getContent(MetadataMode.NONE);
+      return `<Citation id='${nodeId}'>\n${nodeContent}</Citation id='${nodeId}'>`;
+    })
+    .join("\n");
+};
+
+const enhancedPrompt = (totalQuestions: number) => {
+  if (totalQuestions === 0) {
+    return "The student has no questions to research. Let start by providing some questions for the student to research.";
+  }
+
+  if (totalQuestions >= MAX_QUESTIONS) {
+    return `The student has researched ${totalQuestions} questions. Should proceeding writing report or cancel the research if the answers are not enough to write a report.`;
+  }
+
+  return "";
+};
+
+const answerQuestion = async (contextStr: string, question: string) => {
+  const prompt = researchPrompt.format({
+    context_str: contextStr,
+    question,
+  });
+  const result = await Settings.llm.complete({ prompt });
+  return result.text;
+};
@@ -0,0 +1,61 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama).
+
+## Getting Started
+
+First, install the dependencies:
+
+```
+npm install
+```
+
+Second, run the development server:
+
+```
+npm run dev
+```
+
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the chat UI.
+
+## Configure LLM and Embedding Model
+
+You can configure [LLM model](https://ts.llamaindex.ai/docs/llamaindex/modules/llms) in the [settings file](src/app/settings.ts).
+
+## Custom UI Components
+
+We have a custom component located in `components/ui_event.jsx`. This is used to display the state of artifact workflows in UI. You can regenerate a new UI component from the workflow event schema by running the following command:
+
+```
+npm run generate:ui
+```
+
+## Use Case
+
+AI-powered document generator that can help you generate documents with a chat interface and simple markdown editor.
+
+To update the workflow, you can modify the code in [`workflow.ts`](app/workflow.ts).
+
+You can start by sending a request on the [chat UI](http://localhost:3000) or you can test the `/api/chat` endpoint with the following curl request:
+
+```shell
+curl --location 'localhost:3000/api/chat' \
+--header 'Content-Type: application/json' \
+--data '{ "messages": [{ "role": "user", "content": "Compare the financial performance of Apple and Tesla" }] }'
+```
+
+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
+- [LlamaIndexTS Documentation](https://ts.llamaindex.ai/docs/llamaindex) - learn about LlamaIndex (Typescript features).
+- [Workflows Introduction](https://ts.llamaindex.ai/docs/llamaindex/modules/workflows) - learn about LlamaIndexTS workflows.
+
+You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!
@@ -0,0 +1,315 @@
+import { artifactEvent, extractLastArtifact } from "@llamaindex/server";
+import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";
+
+import {
+  agentStreamEvent,
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+  stopAgentEvent,
+  workflowEvent,
+} from "@llamaindex/workflow";
+
+import { z } from "zod";
+
+export const DocumentRequirementSchema = z.object({
+  type: z.enum(["markdown", "html"]),
+  title: z.string(),
+  requirement: z.string(),
+});
+
+export type DocumentRequirement = z.infer<typeof DocumentRequirementSchema>;
+
+export const UIEventSchema = z.object({
+  type: z.literal("ui_event"),
+  data: z.object({
+    state: z
+      .enum(["plan", "generate", "completed"])
+      .describe(
+        "The current state of the workflow: 'plan', 'generate', or 'completed'.",
+      ),
+    requirement: z
+      .string()
+      .optional()
+      .describe(
+        "An optional requirement creating or updating a document, if applicable.",
+      ),
+  }),
+});
+
+export type UIEvent = z.infer<typeof UIEventSchema>;
+
+const planEvent = workflowEvent<{
+  userInput: MessageContent;
+  context?: string | undefined;
+}>();
+
+const generateArtifactEvent = workflowEvent<{
+  requirement: DocumentRequirement;
+}>();
+
+const synthesizeAnswerEvent = workflowEvent<{
+  requirement: DocumentRequirement;
+  generatedArtifact: string;
+}>();
+
+const uiEvent = workflowEvent<UIEvent>();
+
+export function workflowFactory(reqBody: any) {
+  const llm = Settings.llm;
+
+  const { withState, getContext } = createStatefulMiddleware(() => {
+    return {
+      memory: new ChatMemoryBuffer({ llm }),
+      lastArtifact: extractLastArtifact(reqBody),
+    };
+  });
+  const workflow = withState(createWorkflow());
+
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { userInput, chatHistory = [] } = data;
+    // Prepare chat history
+    const { state } = getContext();
+    // Put user input to the memory
+    if (!userInput) {
+      throw new Error("Missing user input to start the workflow");
+    }
+    state.memory.set(chatHistory);
+    state.memory.put({ role: "user", content: userInput });
+
+    return planEvent.with({
+      userInput,
+      context: state.lastArtifact
+        ? JSON.stringify(state.lastArtifact)
+        : undefined,
+    });
+  });
+
+  workflow.handle([planEvent], async ({ data: planData }) => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "plan",
+        },
+      }),
+    );
+    const user_msg = planData.userInput;
+    const context = planData.context
+      ? `## The context is: \n${planData.context}\n`
+      : "";
+    const prompt = `
+         You are a documentation analyst responsible for analyzing the user's request and providing requirements for document generation or update.
+         Follow these instructions:
+         1. Carefully analyze the conversation history and the user's request to determine what has been done and what the next step should be.
+         2. From the user's request, provide requirements for the next step of the document generation or update.
+         3. Do not be verbose; only return the requirements for the next step of the document generation or update.
+         4. Only the following document types are allowed: "markdown", "html".
+         5. The requirement should be in the following format:
+            \`\`\`json
+            {
+                "type": "markdown" | "html",
+                "title": string,
+                "requirement": string
+            }
+            \`\`\`
+
+         ## Example:
+         User request: Create a project guideline document.
+         You should return:
+         \`\`\`json
+         {
+             "type": "markdown",
+             "title": "Project Guideline",
+             "requirement": "Generate a Markdown document that outlines the project goals, deliverables, and timeline. Include sections for introduction, objectives, deliverables, and timeline."
+         }
+         \`\`\`
+
+         User request: Add a troubleshooting section to the guideline.
+         You should return:
+         \`\`\`json
+         {
+             "type": "markdown",
+             "title": "Project Guideline",
+             "requirement": "Add a 'Troubleshooting' section at the end of the document with common issues and solutions."
+         }
+         \`\`\`
+
+         ${context}
+
+         Now, please plan for the user's request:
+         ${user_msg}
+        `;
+
+    const response = await llm.complete({
+      prompt,
+    });
+    // Parse the response to DocumentRequirement
+    const jsonBlock = response.text.match(/```json\s*([\s\S]*?)\s*```/);
+    if (!jsonBlock) {
+      throw new Error("No JSON block found in the response.");
+    }
+    const requirement = DocumentRequirementSchema.parse(
+      JSON.parse(jsonBlock[1]),
+    );
+    state.memory.put({
+      role: "assistant",
+      content: `Planning for the document generation: \n${response.text}`,
+    });
+    return generateArtifactEvent.with({
+      requirement,
+    });
+  });
+
+  workflow.handle(
+    [generateArtifactEvent],
+    async ({ data: { requirement } }) => {
+      const { sendEvent } = getContext();
+      const { state } = getContext();
+
+      sendEvent(
+        uiEvent.with({
+          type: "ui_event",
+          data: {
+            state: "generate",
+            requirement: requirement.requirement,
+          },
+        }),
+      );
+
+      const previousArtifact = state.lastArtifact
+        ? JSON.stringify(state.lastArtifact)
+        : "";
+      const requirementStr = JSON.stringify(requirement);
+
+      const prompt = `
+         You are a skilled technical writer who can help users with documentation.
+         You are given a task to generate or update a document for a given requirement.
+
+         ## Follow these instructions:
+         **1. Carefully read the user's requirements.**
+            If any details are ambiguous or missing, make reasonable assumptions and clearly reflect those in your output.
+            If the previous document is provided:
+            + Carefully analyze the document with the request to make the right changes.
+            + Avoid making unnecessary changes from the previous document if the request is not to rewrite it from scratch.
+         **2. For document requests:**
+            - If the user does not specify a type, default to Markdown.
+            - Ensure the document is clear, well-structured, and grammatically correct.
+            - Only generate content relevant to the user's request—do not add extra boilerplate.
+         **3. Do not be verbose in your response.**
+            - No other text or comments; only return the document content wrapped by the appropriate code block (\`\`\`markdown or \`\`\`html).
+            - If the user's request is to update the document, only return the updated document.
+         **4. Only the following types are allowed: "markdown", "html".**
+         **5. If there is no change to the document, return the reason without any code block.**
+
+         ## Example:
+         \`\`\`markdown
+         # Project Guideline
+         
+         ## Introduction
+         ...
+         \`\`\`
+
+         The previous content is:
+         ${previousArtifact}
+
+         Now, please generate the document for the following requirement:
+         ${requirementStr}
+      `;
+
+      const response = await llm.complete({
+        prompt,
+      });
+
+      // Extract the document from the response
+      const docMatch = response.text.match(/```(markdown|html)([\s\S]*)```/);
+      const generatedContent = response.text;
+
+      if (docMatch) {
+        const content = docMatch[2].trim();
+        const docType = docMatch[1] as "markdown" | "html";
+
+        // Put the generated document to the memory
+        state.memory.put({
+          role: "assistant",
+          content: `Generated document: \n${response.text}`,
+        });
+
+        // To show the Canvas panel for the artifact
+        sendEvent(
+          artifactEvent.with({
+            type: "artifact",
+            data: {
+              type: "document",
+              created_at: Date.now(),
+              data: {
+                title: requirement.title,
+                content: content,
+                type: docType,
+              },
+            },
+          }),
+        );
+      }
+
+      return synthesizeAnswerEvent.with({
+        requirement,
+        generatedArtifact: generatedContent,
+      });
+    },
+  );
+
+  workflow.handle([synthesizeAnswerEvent], async ({ data }) => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+
+    const chatHistory = await state.memory.getMessages();
+    const messages = [
+      ...chatHistory,
+      {
+        role: "system" as const,
+        content: `
+                Your responsibility is to explain the work to the user.
+                If there is no document to update, explain the reason.
+                If the document is updated, just summarize what changed. Don't need to include the whole document again in the response.
+                `,
+      },
+    ];
+
+    const responseStream = await llm.chat({
+      messages,
+      stream: true,
+    });
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "completed",
+          requirement: data.requirement.requirement,
+        },
+      }),
+    );
+
+    let response = "";
+    for await (const chunk of responseStream) {
+      response += chunk.delta;
+      sendEvent(
+        agentStreamEvent.with({
+          delta: chunk.delta,
+          response: "",
+          currentAgentName: "assistant",
+          raw: chunk,
+        }),
+      );
+    }
+
+    return stopAgentEvent.with({
+      result: response,
+    });
+  });
+
+  return workflow;
+}
@@ -41,6 +41,14 @@ curl --location 'localhost:3000/api/chat' \
 --data '{ "messages": [{ "role": "user", "content": "Generate a financial report that compares the financial performance of Apple and Tesla" }] }'
 ```

+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
 ## Learn More

 To learn more about LlamaIndex, take a look at the following resources:
@@ -0,0 +1,318 @@
+import { toAgentRunEvent, toSourceEvent } from "@llamaindex/server";
+import {
+  callTools,
+  chatWithTools,
+  documentGenerator,
+  interpreter,
+} from "@llamaindex/tools";
+import {
+  agentStreamEvent,
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+  stopAgentEvent,
+  workflowEvent,
+} from "@llamaindex/workflow";
+import {
+  BaseToolWithCall,
+  ChatMemoryBuffer,
+  ChatMessage,
+  Metadata,
+  NodeWithScore,
+  Settings,
+  ToolCall,
+  ToolCallLLM,
+} from "llamaindex";
+import { getIndex } from "./data";
+
+export async function workflowFactory(reqBody: any) {
+  const index = await getIndex(reqBody?.data);
+
+  const queryEngineTool = index.queryTool({
+    metadata: {
+      name: "query_document",
+      description: `This tool can retrieve information about Apple and Tesla financial data`,
+    },
+    includeSourceNodes: true,
+  });
+
+  if (!process.env.E2B_API_KEY) {
+    throw new Error("E2B_API_KEY is required to use the code interpreter tool");
+  }
+
+  const codeInterpreterTool = interpreter({
+    apiKey: process.env.E2B_API_KEY!,
+  });
+  const documentGeneratorTool = documentGenerator();
+
+  return getWorkflow(
+    queryEngineTool,
+    codeInterpreterTool,
+    documentGeneratorTool,
+  );
+}
+
+// workflow events
+const inputEvent = workflowEvent<{ input: ChatMessage[] }>();
+const researchEvent = workflowEvent<{ toolCalls: ToolCall[] }>();
+const analyzeEvent = workflowEvent<{ input: ChatMessage | ToolCall[] }>();
+const reportGenerationEvent = workflowEvent<{ toolCalls: ToolCall[] }>();
+
+const DEFAULT_SYSTEM_PROMPT = `
+You are a financial analyst who are given a set of tools to help you.
+It's good to using appropriate tools for the user request and always use the information from the tools, don't make up anything yourself.
+For the query engine tool, you should break down the user request into a list of queries and call the tool with the queries.
+`;
+
+// workflow definition
+export function getWorkflow(
+  queryEngineTool: BaseToolWithCall,
+  codeInterpreterTool: BaseToolWithCall,
+  documentGeneratorTool: BaseToolWithCall,
+) {
+  const llm = Settings.llm as ToolCallLLM;
+  if (!llm.supportToolCall) {
+    throw new Error("LLM is not a ToolCallLLM");
+  }
+  const { withState, getContext } = createStatefulMiddleware(() => ({
+    memory: new ChatMemoryBuffer({ llm, chatHistory: [] }),
+  }));
+
+  const workflow = withState(createWorkflow());
+
+  // Add steps
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { state } = getContext();
+    const { userInput, chatHistory = [] } = data;
+    if (!userInput) throw new Error("Invalid input");
+
+    state.memory.set(chatHistory);
+
+    state.memory.put({ role: "system", content: DEFAULT_SYSTEM_PROMPT });
+
+    state.memory.put({ role: "user", content: userInput });
+
+    const messages = await state.memory.getMessages();
+    return inputEvent.with({ input: messages });
+  });
+
+  workflow.handle([inputEvent], async ({ data }) => {
+    const { sendEvent, state } = getContext();
+    const chatHistory = data.input;
+
+    const tools = [codeInterpreterTool, documentGeneratorTool, queryEngineTool];
+
+    const toolCallResponse = await chatWithTools(llm, tools, chatHistory);
+
+    if (!toolCallResponse.hasToolCall()) {
+      const generator = toolCallResponse.responseGenerator;
+      let response = "";
+      if (generator) {
+        for await (const chunk of generator) {
+          response += chunk.delta;
+          sendEvent(
+            agentStreamEvent.with({
+              delta: chunk.delta,
+              response,
+              currentAgentName: "LLM", // Or derive from context if needed
+              raw: chunk.raw,
+            }),
+          );
+        }
+      }
+      return stopAgentEvent.with({ result: response });
+    }
+
+    if (toolCallResponse.hasMultipleTools()) {
+      state.memory.put({
+        role: "assistant",
+        content:
+          "Calling different tools is not allowed. Please only use multiple calls of the same tool.",
+      });
+      const newChatHistory = await state.memory.getMessages();
+      return inputEvent.with({ input: newChatHistory });
+    }
+
+    // Put the LLM tool call message into the memory
+    // And trigger the next step according to the tool call
+    if (toolCallResponse.toolCallMessage) {
+      state.memory.put(toolCallResponse.toolCallMessage);
+    }
+    const toolName = toolCallResponse.getToolNames()[0];
+    switch (toolName) {
+      case codeInterpreterTool.metadata.name:
+        return analyzeEvent.with({
+          input: toolCallResponse.toolCalls,
+        });
+      case documentGeneratorTool.metadata.name:
+        return reportGenerationEvent.with({
+          toolCalls: toolCallResponse.toolCalls,
+        });
+      default:
+        if (queryEngineTool.metadata.name === toolName) {
+          return researchEvent.with({
+            toolCalls: toolCallResponse.toolCalls,
+          });
+        }
+        throw new Error(`Unknown tool: ${toolName}`);
+    }
+  });
+
+  workflow.handle([researchEvent], async ({ data }) => {
+    const { sendEvent, state } = getContext();
+    sendEvent(
+      toAgentRunEvent({
+        agent: "Researcher",
+        text: "Researching data",
+        type: "text",
+      }),
+    );
+
+    const { toolCalls } = data;
+
+    const toolMsgs = await callTools({
+      tools: [queryEngineTool],
+      toolCalls,
+      writeEvent: (text, step) => {
+        sendEvent(
+          toAgentRunEvent({
+            agent: "Researcher",
+            text,
+            type: toolCalls.length > 1 ? "progress" : "text",
+            current: step,
+            total: toolCalls.length,
+          }),
+        );
+      },
+    });
+    for (const toolMsg of toolMsgs) {
+      state.memory.put(toolMsg);
+    }
+
+    const sourcesNodes: NodeWithScore<Metadata>[] = toolMsgs
+      .map((msg) => (msg.options as any)?.toolResult?.result?.sourceNodes)
+      .flat()
+      .filter(Boolean);
+
+    if (sourcesNodes.length > 0) {
+      sendEvent(toSourceEvent(sourcesNodes));
+    }
+
+    // Send a message indicating research is done, triggering analysis
+    return analyzeEvent.with({
+      input: {
+        role: "assistant",
+        content:
+          "I have finished researching the data, please analyze the data.",
+      },
+    });
+  });
+
+  /**
+   * Analyze a research result or a tool call for code interpreter from the LLM
+   */
+  workflow.handle([analyzeEvent], async ({ data }) => {
+    const { sendEvent, state } = getContext();
+    sendEvent(
+      toAgentRunEvent({
+        agent: "Analyst",
+        text: "Analyzing data",
+        type: "text",
+      }),
+    );
+    // Request by workflow LLM, input is a list of tool calls
+    let toolCalls: ToolCall[] = [];
+    if (Array.isArray(data.input)) {
+      toolCalls = data.input;
+    } else {
+      // Requested by Researcher, input is a ChatMessage
+      // We start new LLM chat specifically for analyzing the data
+      const analysisPrompt = `
+      You are an expert in analyzing financial data.
+      You are given a set of financial data to analyze. Your task is to analyze the financial data and return a report.
+      Your response should include a detailed analysis of the financial data, including any trends, patterns, or insights that you find.
+      Construct the analysis in textual format; including tables would be great!
+      Don't need to synthesize the data, just analyze and provide your findings.
+      `;
+
+      // Clone the current chat history
+      // Add the analysis system prompt and the message from the researcher
+      const currentChatHistory = await state.memory.getMessages();
+      const newChatHistory = [
+        ...currentChatHistory,
+        { role: "system", content: analysisPrompt },
+        data.input, // This is the ChatMessage from the research step
+      ];
+      const toolCallResponse = await chatWithTools(
+        llm,
+        [codeInterpreterTool],
+        newChatHistory as ChatMessage[],
+      );
+
+      if (!toolCallResponse.hasToolCall()) {
+        // If no tool call needed for analysis, put the response directly
+        state.memory.put(await toolCallResponse.asFullResponse());
+        const finalChatHistory = await state.memory.getMessages();
+        return inputEvent.with({ input: finalChatHistory });
+      } else {
+        state.memory.put(toolCallResponse.toolCallMessage!);
+        toolCalls = toolCallResponse.toolCalls;
+      }
+    }
+
+    // Call the code interpreter tools if needed
+    if (toolCalls.length > 0) {
+      const toolMsgs = await callTools({
+        tools: [codeInterpreterTool],
+        toolCalls,
+        writeEvent: (text, step) => {
+          sendEvent(
+            toAgentRunEvent({
+              agent: "Analyst",
+              text,
+              type: toolCalls.length > 1 ? "progress" : "text",
+              current: step,
+              total: toolCalls.length,
+            }),
+          );
+        },
+      });
+      for (const toolMsg of toolMsgs) {
+        state.memory.put(toolMsg);
+      }
+    }
+
+    const finalChatHistory = await state.memory.getMessages();
+    // After analysis (or tool calls for analysis), trigger the next LLM input cycle
+    return inputEvent.with({ input: finalChatHistory });
+  });
+
+  workflow.handle([reportGenerationEvent], async ({ data }) => {
+    const { sendEvent, state } = getContext();
+    const { toolCalls } = data;
+
+    const toolMsgs = await callTools({
+      tools: [documentGeneratorTool],
+      toolCalls,
+      writeEvent: (text, step) => {
+        sendEvent(
+          toAgentRunEvent({
+            agent: "Reporter",
+            text,
+            type: toolCalls.length > 1 ? "progress" : "text",
+            current: step,
+            total: toolCalls.length,
+          }),
+        );
+      },
+    });
+    for (const toolMsg of toolMsgs) {
+      state.memory.put(toolMsg);
+    }
+    const chatHistory = await state.memory.getMessages();
+    // After report generation, trigger the next LLM input cycle
+    return inputEvent.with({ input: chatHistory });
+  });
+
+  return workflow;
+}
@@ -0,0 +1,39 @@
+import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
+import "dotenv/config";
+import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
+import { initSettings } from "./app/settings";
+
+async function generateDatasource() {
+  console.log(`Generating storage context...`);
+  // Split documents, create embeddings and store them in the storage context
+  const storageContext = await storageContextFromDefaults({
+    persistDir: "storage",
+  });
+  // load documents from current directory into an index
+  const reader = new SimpleDirectoryReader();
+  const documents = await reader.loadData("data");
+
+  await VectorStoreIndex.fromDocuments(documents, {
+    storageContext,
+  });
+  console.log("Storage context successfully generated.");
+}
+
+(async () => {
+  const args = process.argv.slice(2);
+  const command = args[0];
+
+  initSettings();
+
+  if (command === "ui") {
+    console.error("This project doesn't use any custom UI.");
+    return;
+  } else {
+    if (command !== "datasource") {
+      console.error(
+        `Unrecognized command: ${command}. Generating datasource by default.`,
+      );
+    }
+    await generateDatasource();
+  }
+})();
@@ -12,11 +12,12 @@ from llama_index.server.services.llamacloud.generate import (
    load_to_llamacloud,
 )

+
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger()


-def generate_datasource():
+def generate_index():
    init_settings()
    logger.info("Generate index for the provided data")

@@ -27,5 +28,26 @@ def generate_datasource():
    load_to_llamacloud(index, logger=logger)


-if __name__ == "__main__":
-    generate_datasource()
+def generate_ui_for_workflow():
+    """
+    Generate UI for UIEventData event in app/workflow.py
+    """
+    import asyncio
+    from llama_index.llms.openai import OpenAI
+    from main import COMPONENT_DIR
+
+    # To generate UI components for additional event types,
+    # import the corresponding data model (e.g., MyCustomEventData)
+    # and run the generate_ui_for_workflow function with the imported model.
+    # Make sure the output filename of the generated UI component matches the event type (here `ui_event`)
+    try:
+        from app.workflow import UIEventData  # type: ignore
+    except ImportError:
+        raise ImportError("Couldn't generate UI component for the current workflow.")
+    from llama_index.server.gen_ui import generate_event_component
+
+    # works also well with Claude 3.7 Sonnet or Gemini Pro 2.5
+    llm = OpenAI(model="gpt-4.1")
+    code = asyncio.run(generate_event_component(event_cls=UIEventData, llm=llm))
+    with open(f"{COMPONENT_DIR}/ui_event.jsx", "w") as f:
+        f.write(code)
@@ -1,3 +1,5 @@
+import { OpenAI } from "@llamaindex/openai";
+import { generateEventComponent } from "@llamaindex/server";
 import * as dotenv from "dotenv";
 import "dotenv/config";
 import * as fs from "fs/promises";
@@ -88,7 +90,7 @@ async function loadAndIndex() {
  console.log(`Successfully uploaded documents to LlamaCloud!`);
 }

-(async () => {
+async function generateDatasource() {
  try {
    checkRequiredEnvVars();
    initSettings();
@@ -97,4 +99,39 @@ async function loadAndIndex() {
  } catch (error) {
    console.error("Error generating storage.", error);
  }
+}
+
+async function generateUi() {
+  // Also works well with Claude 3.5 Sonnet and Google Gemini 2.5 Pro
+  const llm = new OpenAI({ model: "gpt-4.1" });
+
+  const workflowModule = await import("./app/workflow");
+  const UIEventSchema = (workflowModule as any).UIEventSchema;
+  if (!UIEventSchema) {
+    throw new Error(
+      "To generate the UI, you must define a UIEventSchema in your workflow.",
+    );
+  }
+
+  const generatedCode = await generateEventComponent(UIEventSchema, llm);
+  // Write the generated code to components/ui_event.ts
+  await fs.writeFile("components/ui_event.jsx", generatedCode);
+}
+
+(async () => {
+  const args = process.argv.slice(2);
+  const command = args[0];
+
+  initSettings();
+
+  if (command === "datasource") {
+    await generateDatasource();
+  } else if (command === "ui") {
+    await generateUi();
+  } else {
+    console.error(
+      'Invalid command. Please use "datasource" or "ui". Running "datasource" by default.',
+    );
+    await generateDatasource(); // Default behavior or could throw an error
+  }
 })();
@@ -1,4 +1,4 @@
-import { LlamaCloudIndex } from "llamaindex/cloud/LlamaCloudIndex";
+import { LlamaCloudIndex } from "llamaindex";

 type LlamaCloudDataSourceParams = {
  llamaCloudPipeline?: {
@@ -1,4 +1,4 @@
-import { LlamaCloudIndex } from "llamaindex/cloud/LlamaCloudIndex";
+import { LlamaCloudIndex } from "llamaindex";

 type LlamaCloudDataSourceParams = {
  llamaCloudPipeline?: {
@@ -1,15 +0,0 @@
-from app.code_workflow import CodeArtifactWorkflow
-
-# from app.document_workflow import DocumentArtifactWorkflow to generate documents
-from llama_index.core.workflow import Workflow
-from llama_index.llms.openai import OpenAI
-from llama_index.server.api.models import ChatRequest
-
-
-def create_workflow(chat_request: ChatRequest) -> Workflow:
-    workflow = CodeArtifactWorkflow(
-        llm=OpenAI(model="gpt-4.1"),
-        chat_request=chat_request,
-        timeout=120.0,
-    )
-    return workflow
@@ -1,447 +0,0 @@
-import { toSourceEvent, toStreamGenerator } from "@llamaindex/server";
-import {
-  AgentInputData,
-  AgentWorkflowContext,
-  ChatMemoryBuffer,
-  ChatResponseChunk,
-  HandlerContext,
-  LlamaCloudIndex,
-  Metadata,
-  MetadataMode,
-  NodeWithScore,
-  PromptTemplate,
-  Settings,
-  StartEvent,
-  StopEvent as StopEventBase,
-  ToolCallLLM,
-  VectorStoreIndex,
-  Workflow,
-  WorkflowEvent,
-} from "llamaindex";
-import { randomUUID } from "node:crypto";
-import { z } from "zod";
-import { getIndex } from "./data";
-
-// workflow factory
-export const workflowFactory = async (reqBody: any) => {
-  const index = await getIndex(reqBody?.data);
-  return new DeepResearchWorkflow(index);
-};
-
-// workflow configs
-const MAX_QUESTIONS = 6; // max number of questions to research, research will stop when this number is reached
-const TIMEOUT = 360; // timeout in seconds
-const TOP_K = 10; // number of nodes to retrieve from the vector store
-
-const createPlanResearchPrompt = new PromptTemplate({
-  template: `
-You are a professor who is guiding a researcher to research a specific request/problem.
-Your task is to decide on a research plan for the researcher.
-
-The possible actions are:
-+ Provide a list of questions for the researcher to investigate, with the purpose of clarifying the request.
-+ Write a report if the researcher has already gathered enough research on the topic and can resolve the initial request.
-+ Cancel the research if most of the answers from researchers indicate there is insufficient information to research the request. Do not attempt more than 3 research iterations or too many questions.
-
-The workflow should be:
-+ Always begin by providing some initial questions for the researcher to investigate.
-+ Analyze the provided answers against the initial topic/request. If the answers are insufficient to resolve the initial request, provide additional questions for the researcher to investigate.
-+ If the answers are sufficient to resolve the initial request, instruct the researcher to write a report.
-
-Here are the context: 
-<Collected information>
-{context_str}
-</Collected information>
-
-<Conversation context>
-{conversation_context}
-</Conversation context>
-
-{enhanced_prompt}
-
-Now, provide your decision in the required format for this user request:
-<User request>
-{user_request}
-</User request>
-`,
-  templateVars: [
-    "context_str",
-    "conversation_context",
-    "enhanced_prompt",
-    "user_request",
-  ],
-});
-
-const researchPrompt = new PromptTemplate({
-  template: `
-You are a researcher who is in the process of answering the question.
-The purpose is to answer the question based on the collected information, without using prior knowledge or making up any new information.
-Always add citations to the sentence/point/paragraph using the id of the provided content.
-The citation should follow this format: [citation:id] where id is the id of the content.
-
-E.g:
-If we have a context like this:
-<Citation id='abc-xyz'>
-Baby llama is called cria
-</Citation id='abc-xyz'>
-
-And your answer uses the content, then the citation should be:
- Baby llama is called cria [citation:abc-xyz]
-
- Here is the provided context for the question:
-<Collected information>
-{context_str}
-</Collected information>
-
-No prior knowledge, just use the provided context to answer the question: {question}
-`,
-  templateVars: ["context_str", "question"],
-});
-
-const WRITE_REPORT_PROMPT = `
-You are a researcher writing a report based on a user request and the research context.
-You have researched various perspectives related to the user request.
-The report should provide a comprehensive outline covering all important points from the researched perspectives.
-Create a well-structured outline for the research report that covers all the answers.
-
-# IMPORTANT when writing in markdown format:
-+ Use tables or figures where appropriate to enhance presentation.
-+ Preserve all citation syntax (the \`[citation:id]()\` parts in the provided context). Keep these citations in the final report - no separate reference section is needed.
-+ Do not add links, a table of contents, or a references section to the report.
-`;
-
-// workflow events
-type ResearchQuestion = { questionId: string; question: string };
-type ResearchResult = ResearchQuestion & { answer: string };
-
-class PlanResearchEvent extends WorkflowEvent<{}> {}
-class ResearchEvent extends WorkflowEvent<ResearchQuestion[]> {}
-class ReportEvent extends WorkflowEvent<{}> {}
-class StopEvent extends StopEventBase<AsyncGenerator<ChatResponseChunk>> {}
-
-export const UIEventSchema = z
-  .object({
-    event: z
-      .enum(["retrieve", "analyze", "answer"])
-      .describe(
-        "The type of event. DeepResearch has 3 main stages:\n1. retrieve: Retrieve the context from the vector store\n2. analyze: Analyze the context and generate a research questions to answer\n3. answer: Answer the provided questions. Each question has a unique id, when the state is done, the event will have the answer for the question.",
-      ),
-    state: z
-      .enum(["pending", "inprogress", "done", "error"])
-      .describe("The state for each event"),
-    id: z.string().optional().describe("The id of the question"),
-    question: z
-      .string()
-      .optional()
-      .describe("The question generated by the LLM"),
-    answer: z.string().optional().describe("The answer generated by the LLM"),
-  })
-  .describe("DeepResearchEvent");
-
-type UIEventData = z.infer<typeof UIEventSchema>;
-
-class UIEvent extends WorkflowEvent<{
-  type: "ui_event";
-  data: UIEventData;
-}> {}
-
-// workflow definition
-class DeepResearchWorkflow extends Workflow<
-  AgentWorkflowContext,
-  AgentInputData,
-  string
-> {
-  #llm = Settings.llm as ToolCallLLM;
-  #index?: VectorStoreIndex | LlamaCloudIndex;
-
-  userRequest: string = "";
-  totalQuestions: number = 0;
-  contextNodes: NodeWithScore<Metadata>[] = [];
-  memory: ChatMemoryBuffer = new ChatMemoryBuffer({ llm: Settings.llm });
-
-  constructor(index: VectorStoreIndex | LlamaCloudIndex) {
-    super({ timeout: TIMEOUT });
-    this.#index = index;
-    this.addWorkflowSteps();
-  }
-
-  addWorkflowSteps() {
-    this.addStep(
-      {
-        inputs: [StartEvent<AgentInputData>],
-        outputs: [PlanResearchEvent],
-      },
-      this.handleStartWorkflow,
-    );
-    this.addStep(
-      {
-        inputs: [PlanResearchEvent],
-        outputs: [ResearchEvent, ReportEvent, StopEvent],
-      },
-      this.handlePlanResearch,
-    );
-    this.addStep(
-      {
-        inputs: [ResearchEvent],
-        outputs: [PlanResearchEvent],
-      },
-      this.handleResearch,
-    );
-    this.addStep(
-      {
-        inputs: [ReportEvent],
-        outputs: [StopEvent],
-      },
-      this.handleReport,
-    );
-  }
-
-  async initWorkflow(data: AgentInputData) {
-    const { userInput, chatHistory = [] } = data;
-    if (!userInput) throw new Error("Invalid input");
-
-    this.userRequest = userInput;
-
-    await this.memory.set(chatHistory);
-    await this.memory.put({ role: "user", content: userInput });
-  }
-
-  handleStartWorkflow = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: StartEvent<AgentInputData>,
-  ): Promise<PlanResearchEvent> => {
-    await this.initWorkflow(ev.data);
-
-    ctx.sendEvent(
-      new UIEvent({
-        type: "ui_event",
-        data: { event: "retrieve", state: "inprogress" },
-      }),
-    );
-
-    const retrievedNodes = await this.retriever.retrieve(this.userRequest);
-
-    ctx.sendEvent(toSourceEvent(retrievedNodes));
-
-    ctx.sendEvent(
-      new UIEvent({
-        type: "ui_event",
-        data: { event: "retrieve", state: "done" },
-      }),
-    );
-
-    this.contextNodes = retrievedNodes;
-
-    return new PlanResearchEvent({});
-  };
-
-  handlePlanResearch = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: PlanResearchEvent,
-  ): Promise<ResearchEvent | ReportEvent | StopEvent> => {
-    ctx.sendEvent(
-      new UIEvent({
-        type: "ui_event",
-        data: { event: "analyze", state: "inprogress" },
-      }),
-    );
-
-    const { decision, researchQuestions, cancelReason } =
-      await this.createResearchPlan();
-
-    // Stop workflow due to decision from LLM
-    if (decision === "cancel") {
-      ctx.sendEvent(
-        new UIEvent({
-          type: "ui_event",
-          data: { event: "analyze", state: "done" },
-        }),
-      );
-      return new StopEvent(
-        toStreamGenerator(
-          cancelReason ?? "Research cancelled without any reason.",
-        ),
-      );
-    }
-
-    // Trigger research from generated questions
-    if (decision === "research") {
-      this.memory.put({
-        role: "assistant",
-        content:
-          "We need to find answers to the following questions:\n" +
-          researchQuestions.join("\n"),
-      });
-
-      researchQuestions.forEach(({ questionId: id, question }) => {
-        ctx.sendEvent(
-          new UIEvent({
-            type: "ui_event",
-            data: { event: "answer", state: "pending", id, question },
-          }),
-        );
-      });
-
-      return new ResearchEvent(researchQuestions);
-    }
-
-    // Resarch done, start writing report
-    this.memory.put({
-      role: "assistant",
-      content: "No more idea to analyze. We should report the answers.",
-    });
-
-    ctx.sendEvent(
-      new UIEvent({
-        type: "ui_event",
-        data: { event: "analyze", state: "done" },
-      }),
-    );
-
-    return new ReportEvent({});
-  };
-
-  handleResearch = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: ResearchEvent,
-  ): Promise<PlanResearchEvent> => {
-    const researchQuestions = ev.data;
-
-    // Answer questions in parallel
-    const researchResults: ResearchResult[] = await Promise.all(
-      researchQuestions.map(async ({ questionId: id, question }) => {
-        ctx.sendEvent(
-          new UIEvent({
-            type: "ui_event",
-            data: { event: "answer", state: "inprogress", id, question },
-          }),
-        );
-
-        const answer = await this.answerQuestion(question);
-
-        ctx.sendEvent(
-          new UIEvent({
-            type: "ui_event",
-            data: { event: "answer", state: "done", id, question, answer },
-          }),
-        );
-
-        return { questionId: id, question, answer };
-      }),
-    );
-
-    // Save answers to memory
-    researchResults.forEach(({ question, answer }) => {
-      this.memory.put({
-        role: "assistant",
-        content: `<Question>${question}</Question>\n<Answer>${answer}</Answer>`,
-      });
-    });
-
-    this.memory.put({
-      role: "assistant",
-      content:
-        "Researched all the questions. Now, I need to analyze if it's ready to write a report or need to research more.",
-    });
-
-    this.totalQuestions += researchResults.length;
-
-    return new PlanResearchEvent({});
-  };
-
-  handleReport = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: ReportEvent,
-  ): Promise<StopEvent> => {
-    const chatHistory = await this.memory.getAllMessages();
-
-    const messages = chatHistory.concat([
-      {
-        role: "system",
-        content: WRITE_REPORT_PROMPT,
-      },
-      {
-        role: "user",
-        content:
-          "Write a report addressing the user request based on the research provided the context",
-      },
-    ]);
-
-    const stream = await this.llm.chat({ messages, stream: true });
-
-    return new StopEvent(toStreamGenerator(stream));
-  };
-
-  get llm() {
-    if (!this.#llm.supportToolCall) throw new Error("LLM is not a ToolCallLLM");
-    return this.#llm;
-  }
-
-  get retriever() {
-    if (!this.#index) throw new Error("Index is not initialized");
-    return this.#index.asRetriever({ similarityTopK: TOP_K });
-  }
-
-  get contextStr() {
-    return this.contextNodes
-      .map((node) => {
-        const nodeId = node.node.id_;
-        const nodeContent = node.node.getContent(MetadataMode.NONE);
-        return `<Citation id='${nodeId}'>\n${nodeContent}</Citation id='${nodeId}'>`;
-      })
-      .join("\n");
-  }
-
-  get enhancedPrompt() {
-    if (this.totalQuestions === 0) {
-      return "The student has no questions to research. Let start by asking some questions.";
-    }
-
-    if (this.totalQuestions > MAX_QUESTIONS) {
-      return `The student has researched ${this.totalQuestions} questions. Should cancel the research if the context is not enough to write a report.`;
-    }
-
-    return "";
-  }
-
-  async createResearchPlan() {
-    const chatHistory = await this.memory.getMessages();
-
-    const conversationContext = chatHistory
-      .map((message) => `${message.role}: ${message.content}`)
-      .join("\n");
-
-    const prompt = createPlanResearchPrompt.format({
-      context_str: this.contextStr,
-      conversation_context: conversationContext,
-      enhanced_prompt: this.enhancedPrompt,
-      user_request: this.userRequest,
-    });
-
-    const responseFormat = z.object({
-      decision: z.enum(["research", "write", "cancel"]),
-      researchQuestions: z.array(z.string()),
-      cancelReason: z.string().optional(),
-    });
-
-    const result = await this.llm.complete({ prompt, responseFormat });
-    const plan = JSON.parse(result.text) as z.infer<typeof responseFormat>;
-
-    return {
-      ...plan,
-      researchQuestions: plan.researchQuestions.map((question) => ({
-        questionId: randomUUID(),
-        question,
-      })),
-    };
-  }
-
-  async answerQuestion(question: string) {
-    const prompt = researchPrompt.format({
-      context_str: this.contextStr,
-      question,
-    });
-    const result = await this.llm.complete({ prompt });
-    return result.text;
-  }
-}
@@ -1,396 +0,0 @@
-import { toAgentRunEvent, toSourceEvent } from "@llamaindex/server";
-import {
-  callTools,
-  chatWithTools,
-  documentGenerator,
-  interpreter,
-} from "@llamaindex/tools";
-import {
-  AgentInputData,
-  AgentWorkflowContext,
-  BaseToolWithCall,
-  ChatMemoryBuffer,
-  ChatMessage,
-  ChatResponseChunk,
-  HandlerContext,
-  Metadata,
-  NodeWithScore,
-  Settings,
-  StartEvent,
-  StopEvent,
-  ToolCall,
-  ToolCallLLM,
-  Workflow,
-  WorkflowEvent,
-} from "llamaindex";
-import { getIndex } from "./data";
-
-const TIMEOUT = 360 * 1000;
-
-export async function workflowFactory(reqBody: any) {
-  const index = await getIndex(reqBody?.data);
-
-  const queryEngineTool = index.queryTool({
-    metadata: {
-      name: "query_document",
-      description: `This tool can retrieve information about Apple and Tesla financial data`,
-    },
-    includeSourceNodes: true,
-  });
-
-  if (!process.env.E2B_API_KEY) {
-    throw new Error("E2B_API_KEY is required to use the code interpreter tool");
-  }
-
-  const codeInterpreterTool = interpreter({
-    apiKey: process.env.E2B_API_KEY!,
-  });
-  const documentGeneratorTool = documentGenerator();
-
-  return new FinancialReportWorkflow({
-    queryEngineTool,
-    codeInterpreterTool,
-    documentGeneratorTool,
-    timeout: TIMEOUT,
-  });
-}
-
-// Create a custom event type
-class InputEvent extends WorkflowEvent<{ input: ChatMessage[] }> {}
-
-class ResearchEvent extends WorkflowEvent<{
-  toolCalls: ToolCall[];
-}> {}
-
-class AnalyzeEvent extends WorkflowEvent<{
-  input: ChatMessage | ToolCall[];
-}> {}
-
-class ReportGenerationEvent extends WorkflowEvent<{
-  toolCalls: ToolCall[];
-}> {}
-
-const DEFAULT_SYSTEM_PROMPT = `
-You are a financial analyst who are given a set of tools to help you.
-It's good to using appropriate tools for the user request and always use the information from the tools, don't make up anything yourself.
-For the query engine tool, you should break down the user request into a list of queries and call the tool with the queries.
-`;
-
-class FinancialReportWorkflow extends Workflow<
-  AgentWorkflowContext,
-  AgentInputData,
-  string
-> {
-  llm: ToolCallLLM;
-  memory: ChatMemoryBuffer;
-  queryEngineTool: BaseToolWithCall;
-  codeInterpreterTool: BaseToolWithCall;
-  documentGeneratorTool: BaseToolWithCall;
-  systemPrompt?: string;
-
-  constructor(options: {
-    queryEngineTool: BaseToolWithCall;
-    codeInterpreterTool: BaseToolWithCall;
-    documentGeneratorTool: BaseToolWithCall;
-    systemPrompt?: string;
-    verbose?: boolean;
-    timeout?: number;
-  }) {
-    super({
-      verbose: options?.verbose ?? false,
-      timeout: options?.timeout ?? 360,
-    });
-
-    this.llm = Settings.llm as ToolCallLLM;
-    if (!this.llm.supportToolCall) {
-      throw new Error("LLM is not a ToolCallLLM");
-    }
-    this.systemPrompt = options.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
-    this.queryEngineTool = options.queryEngineTool;
-    this.codeInterpreterTool = options.codeInterpreterTool;
-
-    this.documentGeneratorTool = options.documentGeneratorTool;
-    this.memory = new ChatMemoryBuffer({ llm: this.llm, chatHistory: [] });
-
-    // Add steps
-    this.addStep(
-      {
-        inputs: [StartEvent<AgentInputData>],
-        outputs: [InputEvent],
-      },
-      this.prepareChatHistory,
-    );
-
-    this.addStep(
-      {
-        inputs: [InputEvent],
-        outputs: [
-          InputEvent,
-          ResearchEvent,
-          AnalyzeEvent,
-          ReportGenerationEvent,
-          StopEvent,
-        ],
-      },
-      this.handleLLMInput,
-    );
-
-    this.addStep(
-      {
-        inputs: [ResearchEvent],
-        outputs: [AnalyzeEvent],
-      },
-      this.handleResearch,
-    );
-
-    this.addStep(
-      {
-        inputs: [AnalyzeEvent],
-        outputs: [InputEvent],
-      },
-      this.handleAnalyze,
-    );
-
-    this.addStep(
-      {
-        inputs: [ReportGenerationEvent],
-        outputs: [InputEvent],
-      },
-      this.handleReportGeneration,
-    );
-  }
-
-  prepareChatHistory = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: StartEvent<AgentInputData>,
-  ): Promise<InputEvent> => {
-    const { userInput, chatHistory = [] } = ev.data;
-    if (!userInput) throw new Error("Invalid input");
-
-    this.memory.set(chatHistory);
-
-    if (this.systemPrompt) {
-      this.memory.put({ role: "system", content: this.systemPrompt });
-    }
-
-    this.memory.put({ role: "user", content: userInput });
-
-    const messages = await this.memory.getMessages();
-    return new InputEvent({ input: messages });
-  };
-
-  handleLLMInput = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: InputEvent,
-  ): Promise<
-    | InputEvent
-    | ResearchEvent
-    | AnalyzeEvent
-    | ReportGenerationEvent
-    | StopEvent<AsyncGenerator<ChatResponseChunk, any, any> | undefined>
-  > => {
-    const chatHistory = ev.data.input;
-
-    const tools = [
-      this.codeInterpreterTool,
-      this.documentGeneratorTool,
-      this.queryEngineTool,
-    ];
-
-    const toolCallResponse = await chatWithTools(this.llm, tools, chatHistory);
-
-    if (!toolCallResponse.hasToolCall()) {
-      return new StopEvent(toolCallResponse.responseGenerator);
-    }
-
-    if (toolCallResponse.hasMultipleTools()) {
-      this.memory.put({
-        role: "assistant",
-        content:
-          "Calling different tools is not allowed. Please only use multiple calls of the same tool.",
-      });
-      const chatHistory = await this.memory.getMessages();
-      return new InputEvent({ input: chatHistory });
-    }
-
-    // Put the LLM tool call message into the memory
-    // And trigger the next step according to the tool call
-    if (toolCallResponse.toolCallMessage) {
-      this.memory.put(toolCallResponse.toolCallMessage);
-    }
-    const toolName = toolCallResponse.getToolNames()[0];
-    switch (toolName) {
-      case this.codeInterpreterTool.metadata.name:
-        return new AnalyzeEvent({
-          input: toolCallResponse.toolCalls,
-        });
-      case this.documentGeneratorTool.metadata.name:
-        return new ReportGenerationEvent({
-          toolCalls: toolCallResponse.toolCalls,
-        });
-      default:
-        if (this.queryEngineTool.metadata.name === toolName) {
-          return new ResearchEvent({
-            toolCalls: toolCallResponse.toolCalls,
-          });
-        }
-        throw new Error(`Unknown tool: ${toolName}`);
-    }
-  };
-
-  handleResearch = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: ResearchEvent,
-  ): Promise<AnalyzeEvent> => {
-    ctx.sendEvent(
-      toAgentRunEvent({
-        agent: "Researcher",
-        text: "Researching data",
-        type: "text",
-      }),
-    );
-
-    const { toolCalls } = ev.data;
-
-    const toolMsgs = await callTools({
-      tools: [this.queryEngineTool],
-      toolCalls,
-      writeEvent: (text, step) => {
-        ctx.sendEvent(
-          toAgentRunEvent({
-            agent: "Researcher",
-            text,
-            type: toolCalls.length > 1 ? "progress" : "text",
-            current: step,
-            total: toolCalls.length,
-          }),
-        );
-      },
-    });
-    for (const toolMsg of toolMsgs) {
-      this.memory.put(toolMsg);
-    }
-
-    const sourcesNodes: NodeWithScore<Metadata>[] = toolMsgs
-      .map((msg) => (msg.options as any)?.toolResult?.result?.sourceNodes)
-      .flat()
-      .filter(Boolean);
-
-    if (sourcesNodes.length > 0) {
-      ctx.sendEvent(toSourceEvent(sourcesNodes));
-    }
-
-    return new AnalyzeEvent({
-      input: {
-        role: "assistant",
-        content:
-          "I have finished researching the data, please analyze the data.",
-      },
-    });
-  };
-
-  /**
-   * Analyze a research result or a tool call for code interpreter from the LLM
-   */
-  handleAnalyze = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: AnalyzeEvent,
-  ): Promise<InputEvent> => {
-    ctx.sendEvent(
-      toAgentRunEvent({
-        agent: "Analyst",
-        text: "Analyzing data",
-        type: "text",
-      }),
-    );
-    // Request by workflow LLM, input is a list of tool calls
-    let toolCalls: ToolCall[] = [];
-    if (Array.isArray(ev.data.input)) {
-      toolCalls = ev.data.input;
-    } else {
-      // Requested by Researcher, input is a ChatMessage
-      // We start new LLM chat specifically for analyzing the data
-      const analysisPrompt = `
-      You are an expert in analyzing financial data.
-      You are given a set of financial data to analyze. Your task is to analyze the financial data and return a report.
-      Your response should include a detailed analysis of the financial data, including any trends, patterns, or insights that you find.
-      Construct the analysis in textual format; including tables would be great!
-      Don't need to synthesize the data, just analyze and provide your findings.
-      `;
-
-      // Clone the current chat history
-      // Add the analysis system prompt and the message from the researcher
-      const chatHistory = await this.memory.getMessages();
-      const newChatHistory = [
-        ...chatHistory,
-        { role: "system", content: analysisPrompt },
-        ev.data.input,
-      ];
-      const toolCallResponse = await chatWithTools(
-        this.llm,
-        [this.codeInterpreterTool],
-        newChatHistory as ChatMessage[],
-      );
-
-      if (!toolCallResponse.hasToolCall()) {
-        this.memory.put(await toolCallResponse.asFullResponse());
-        const chatHistory = await this.memory.getMessages();
-        return new InputEvent({ input: chatHistory });
-      } else {
-        this.memory.put(toolCallResponse.toolCallMessage!);
-        toolCalls = toolCallResponse.toolCalls;
-      }
-    }
-
-    // Call the tools
-    const toolMsgs = await callTools({
-      tools: [this.codeInterpreterTool],
-      toolCalls,
-      writeEvent: (text, step) => {
-        ctx.sendEvent(
-          toAgentRunEvent({
-            agent: "Analyst",
-            text,
-            type: toolCalls.length > 1 ? "progress" : "text",
-            current: step,
-            total: toolCalls.length,
-          }),
-        );
-      },
-    });
-    for (const toolMsg of toolMsgs) {
-      this.memory.put(toolMsg);
-    }
-
-    const chatHistory = await this.memory.getMessages();
-    return new InputEvent({ input: chatHistory });
-  };
-
-  handleReportGeneration = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: ReportGenerationEvent,
-  ): Promise<InputEvent> => {
-    const { toolCalls } = ev.data;
-
-    const toolMsgs = await callTools({
-      tools: [this.documentGeneratorTool],
-      toolCalls,
-      writeEvent: (text, step) => {
-        ctx.sendEvent(
-          toAgentRunEvent({
-            agent: "Reporter",
-            text,
-            type: toolCalls.length > 1 ? "progress" : "text",
-            current: step,
-            total: toolCalls.length,
-          }),
-        );
-      },
-    });
-    for (const toolMsg of toolMsgs) {
-      this.memory.put(toolMsg);
-    }
-    const chatHistory = await this.memory.getMessages();
-    return new InputEvent({ input: chatHistory });
-  };
-}
@@ -1,8 +1,12 @@
+import os
+
 from llama_index.core import Settings
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.llms.openai import OpenAI


 def init_settings():
-    Settings.llm = OpenAI(model="gpt-4o-mini")
-    Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
+    if os.getenv("OPENAI_API_KEY") is None:
+        raise RuntimeError("OPENAI_API_KEY is missing in environment variables")
+    Settings.llm = OpenAI(model="gpt-4.1")
+    Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")
@@ -51,7 +51,7 @@ def generate_ui_for_workflow():
    # and run the generate_ui_for_workflow function with the imported model.
    # Make sure the output filename of the generated UI component matches the event type (here `ui_event`)
    try:
-        from app.workflow import UIEventData
+        from app.workflow import UIEventData  # type: ignore
    except ImportError:
        raise ImportError("Couldn't generate UI component for the current workflow.")
    from llama_index.server.gen_ui import generate_event_component
@@ -16,9 +16,11 @@ def create_app():
        workflow_factory=create_workflow,  # A factory function that creates a new workflow for each request
        ui_config=UIConfig(
            component_dir=COMPONENT_DIR,
-            app_title="Chat App",
+            dev_mode=True,  # Please disable this in production
+            layout_dir="layout",
        ),
        logger=logger,
+        env="dev",
    )
    # You can also add custom FastAPI routes to app
    app.add_api_route("/api/health", lambda: {"message": "OK"}, status_code=200)
@@ -12,7 +12,7 @@ dependencies = [
    "pydantic<2.10",
    "aiostream>=0.5.2,<0.6.0",
    "llama-index-core>=0.12.28,<0.13.0",
-    "llama-index-server>=0.1.15,<0.2.0",
+    "llama-index-server>=0.1.17,<0.2.0",
 ]

 [project.optional-dependencies]
@@ -46,6 +46,9 @@ disable_error_code = [ "return-value", "assignment" ]
 module = "app.*"
 ignore_missing_imports = false

+[tool.hatch.metadata]
+allow-direct-references = true
+
 [build-system]
 requires = [ "hatchling>=1.24" ]
-build-backend = "hatchling.build"
+build-backend = "hatchling.build"
@@ -0,0 +1,6 @@
+{
+  "watch": ["src/**/*.ts"],
+  "exec": "nodemon --exec tsx src/index.ts",
+  "ext": "js ts",
+  "ignore": ["src/app/workflow_*.ts"]
+}
@@ -5,21 +5,23 @@
    "generate": "tsx src/generate.ts datasource",
    "generate:datasource": "tsx src/generate.ts datasource",
    "generate:ui": "tsx src/generate.ts ui",
-    "dev": "tsx watch src/index.ts",
-    "start": "tsx src/index.ts"
+    "dev": "nodemon",
+    "start": "tsx src/index.ts",
+    "eject": "llamaindex-server eject"
  },
  "dependencies": {
-    "@llamaindex/openai": "0.2.0",
-    "@llamaindex/readers": "^2.0.0",
-    "@llamaindex/server": "0.1.5",
-    "@llamaindex/tools": "0.0.4",
+    "@llamaindex/openai": "~0.4.0",
+    "@llamaindex/server": "~0.2.1",
+    "@llamaindex/workflow": "~1.1.3",
+    "@llamaindex/tools": "~0.0.11",
+    "llamaindex": "~0.11.0",
    "dotenv": "^16.4.7",
-    "zod": "^3.23.8",
-    "llamaindex": "0.10.2"
+    "zod": "^3.23.8"
  },
  "devDependencies": {
    "@types/node": "^20.10.3",
-    "tsx": "^4.7.2",
-    "typescript": "^5.3.2"
+    "tsx": "4.7.2",
+    "typescript": "^5.3.2",
+    "nodemon": "^3.1.10"
  }
 }
@@ -3,7 +3,7 @@ import { Settings } from "llamaindex";

 export function initSettings() {
  Settings.llm = new OpenAI({
-    model: "gpt-4o-mini",
+    model: "gpt-4.1",
  });
  Settings.embedModel = new OpenAIEmbedding({
    model: "text-embedding-3-small",
@@ -1,14 +1,10 @@
 import "dotenv/config";
 import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
-import {
-  OpenAI,
-  storageContextFromDefaults,
-  VectorStoreIndex,
-} from "llamaindex";
+import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
 import { initSettings } from "./app/settings";
 import fs from "fs";
 import { generateEventComponent } from "@llamaindex/server";
-import { UIEventSchema } from "./app/workflow";
+import { OpenAI } from "@llamaindex/openai";

 async function generateDatasource() {
  console.log(`Generating storage context...`);
@@ -30,6 +26,14 @@ async function generateUi() {
  // Also works well with Claude 3.5 Sonnet and Google Gemini 2.5 Pro
  const llm = new OpenAI({ model: "gpt-4.1" });

+  const workflowModule = await import("./app/workflow");
+  const UIEventSchema = (workflowModule as any).UIEventSchema;
+  if (!UIEventSchema) {
+    throw new Error(
+      "To generate the UI, you must define a UIEventSchema in your workflow.",
+    );
+  }
+
  // You can also generate for other workflow events
  const generatedCode = await generateEventComponent(UIEventSchema, llm);
  // Write the generated code to components/ui_event.ts
@@ -8,7 +8,7 @@ initSettings();
 new LlamaIndexServer({
  workflow: workflowFactory,
  uiConfig: {
-    appTitle: "LlamaIndex App",
    componentsDir: "components",
+    devMode: true,
  },
 }).start();
@@ -9,6 +9,7 @@ readme = "README.md"
 requires-python = ">=3.11,<3.14"
 dependencies = [
    "llama-index>=0.12.1",
+    "llama-parse>=0.6.21,<0.7.0",
    "fastapi[standard]>=0.109.1",
    "uvicorn>=0.23.2",
    "python-dotenv>=1.0.0",
@@ -1 +1,8 @@
-server/
+# server contains Nextjs frontend code (not compiled)
+server/
+
+# the ejected nextjs project
+project/
+
+# temp is the copy of next folder but without API folder, used to build frontend static files
+temp/
@@ -1,5 +1,55 @@
 # @llamaindex/server

+## 0.2.7
+
+### Patch Changes
+
+- af9ad3c: feat: show document artifact after generating report
+- a543a27: feat: bump chat-ui with inline artifact
+- 1ff6eaf: Add support for chat upload file
+
+## 0.2.6
+
+### Patch Changes
+
+- 3ff0a18: fix: default header padding
+- df10474: fix: missing cursor pointer for button
+- 087c961: Support zod and chat-ui hooks for custom components
+
+## 0.2.5
+
+### Patch Changes
+
+- 058b376: Fix generate script for ejected project
+
+## 0.2.4
+
+### Patch Changes
+
+- 5fe9e17: support eject to fully customize next folder
+- b8a1ff6: Bump version: chat-ui@0.4.6
+
+## 0.2.3
+
+### Patch Changes
+
+- eee3230: feat: support custom layout
+- 0bc5a0d: Add suggestNextQuestions config
+- 3acec88: chore: bump chat-ui
+
+## 0.2.2
+
+### Patch Changes
+
+- 25fba43: refactor: migrate to Nextjs Route Handler
+- 6f75d4a: fix: unsupported language in code gen workflow
+
+## 0.2.1
+
+### Patch Changes
+
+- f072308: feat: add dev mode UI
+
 ## 0.2.0

 ### Minor Changes
@@ -0,0 +1,160 @@
+# @llamaindex/server Package
+
+This package provides a Next.js-based server framework for running LlamaIndex workflows with both API endpoints and a chat UI interface.
+
+## Overview
+
+The `@llamaindex/server` package (`src/`) allows you to quickly launch LlamaIndex Workflows and Agent Workflows as an API server with an optional sophisticated chat UI. It combines a backend API server with a frontend React interface built on Next.js.
+
+## Key Components
+
+### Core Server (src/server.ts)
+
+- **LlamaIndexServer class**: Main server implementation that wraps Next.js
+- Handles workflow factory initialization and UI configuration
+- Manages custom components and layout directories
+- Creates HTTP server with custom routing for chat API
+- Automatically configures client-side config in `public/config.js`
+
+### Chat Handler (src/handlers/chat.ts)
+
+- **handleChat function**: Processes POST requests to `/api/chat`
+- Converts AI SDK messages to LlamaIndex format
+- Manages workflow execution with abort signals
+- Streams responses back to client with optional question suggestions
+- Handles errors and validation
+
+### Workflow Management (src/utils/workflow.ts)
+
+- **runWorkflow function**: Executes workflows with proper event handling
+- Transforms workflow events (tool calls, source nodes) into UI-friendly formats
+- Downloads LlamaCloud files automatically in background
+- Processes agent events and source annotations
+
+### Event System (src/events.ts)
+
+- **Source Events**: For displaying document/file sources with metadata
+- **Agent Events**: For showing agent tool usage and progress
+- **Artifact Events**: For structured data like code/documents sent to Canvas UI
+- Helper functions for converting LlamaIndex data to UI events
+
+### UI Generation (src/utils/gen-ui.ts)
+
+- **generateEventComponent function**: Uses LLM to auto-generate React components
+- Creates workflow for UI planning, aggregation, and code generation
+- Validates generated components against supported dependencies
+- Supports shadcn/ui, lucide-react, tailwind CSS, and LlamaIndex chat-ui
+
+### Types (src/types.ts)
+
+- **WorkflowFactory**: Function signature for creating workflow instances
+- **UIConfig**: Configuration options for chat interface
+- **LlamaIndexServerOptions**: Main server configuration interface
+
+## Next.js Frontend
+
+The `next/` directory contains the React frontend:
+
+### API Routes
+
+- `/api/chat/route.ts`: Main chat endpoint (delegates to handleChat)
+- `/api/components/route.ts`: Serves custom UI components
+- `/api/layout/route.ts`: Serves custom layout components
+- `/api/files/[...slug]/route.ts`: File serving for data/output folders
+
+### UI Components
+
+- Chat interface with message history, streaming responses, and canvas panel
+- Extensible component system for custom workflow events
+- Custom layout support for headers/footers
+- Built with shadcn/ui components and Tailwind CSS
+
+## Build Process
+
+### Development
+
+```bash
+pnpm dev  # Watch mode with bunchee
+```
+
+### Production Build
+
+```bash
+pnpm build  # Multi-step build process
+```
+
+The build process:
+
+1. **prebuild**: Cleans dist, server, and temp directories
+2. **build**: Compiles source with bunchee to ESM/CJS
+3. **postbuild**: Prepares TypeScript server and Python static assets
+4. **prepare:ts-server**: Copies Next.js app, builds CSS, compiles API routes
+5. **prepare:py-static**: Creates static build for Python integration
+
+## Key Features
+
+### Workflow Integration
+
+- Factory pattern for creating workflow instances per request
+- Supports Agent Workflows with startAgentEvent/stopAgentEvent contract
+- Automatic event transformation and streaming
+- Built-in tool call and source node handling
+
+### UI Extensibility
+
+- AI-generated components based on Zod schemas
+- Custom layout sections (header/footer)
+- Canvas panel for artifacts (documents, code)
+- Event aggregation and real-time updates
+
+### File Handling
+
+- Automatic mounting of `data/` and `output/` folders
+- LlamaCloud file downloads in background
+- Static asset serving through Next.js
+
+### Development Features
+
+- Hot reload support for workflow code (beta)
+- Dev mode panel for live code editing
+- TypeScript support throughout
+- Comprehensive error handling
+
+## Configuration
+
+Server configuration through `LlamaIndexServerOptions`:
+
+- `workflow`: Factory function for creating workflow instances
+- `uiConfig.starterQuestions`: Predefined questions for chat interface
+- `uiConfig.componentsDir`: Directory for custom event components
+- `uiConfig.layoutDir`: Directory for custom layout components
+- `uiConfig.llamaCloudIndexSelector`: Enable LlamaCloud integration
+- `uiConfig.devMode`: Enable live code editing
+- `suggestNextQuestions`: Auto-suggest follow-up questions
+
+## Dependencies
+
+### Runtime Dependencies
+
+- Next.js 15+ for server framework
+- React 19+ for UI components
+- LlamaIndex workflow engine
+- Radix UI components (shadcn/ui)
+- AI SDK for streaming responses
+
+### Development Dependencies
+
+- Bunchee for bundling
+- TypeScript for type safety
+- Tailwind CSS for styling
+- PostCSS for CSS processing
+
+## Usage Patterns
+
+1. **Basic Setup**: Create workflow factory, configure UI, start server
+2. **Custom Events**: Define Zod schemas, generate UI components with LLM
+3. **File Integration**: Use data/output folders for document processing
+4. **Development**: Use dev mode for iterative workflow development
+5. **Production**: Build static assets for deployment with Python backend
+
+The package serves as a complete solution for deploying LlamaIndex workflows with professional chat interfaces and extensible UI components.
@@ -4,10 +4,10 @@ LlamaIndexServer is a Next.js-based application that allows you to quickly launc

 ## Features

- Serving a workflow as a chatbot
+- Add a sophisticated chatbot UI to your LlamaIndex workflow
+- Edit code and document artifacts in an OpenAI Canvas-style UI
+- Extendable UI components for events and headers
 - Built on Next.js for high performance and easy API development
- Optional built-in chat UI with extendable UI components
- Prebuilt development code

 ## Installation

@@ -21,19 +21,22 @@ Create an `index.ts` file and add the following code:

 ```ts
 import { LlamaIndexServer } from "@llamaindex/server";
+import { openai } from "@llamaindex/openai";
+import { agent } from "@llamaindex/workflow";
 import { wiki } from "@llamaindex/tools"; // or any other tool

-const createWorkflow = () => agent({ tools: [wiki()] });
+const createWorkflow = () => agent({ tools: [wiki()], llm: openai("gpt-4o") });

 new LlamaIndexServer({
  workflow: createWorkflow,
  uiConfig: {
-    appTitle: "LlamaIndex App",
    starterQuestions: ["Who is the first president of the United States?"],
  },
 }).start();
 ```

+The `createWorkflow` function is a factory function that creates an [Agent Workflow](https://ts.llamaindex.ai/docs/llamaindex/modules/agents/agent_workflow) with a tool that retrieves information from Wikipedia in this case. For more details, read about the [Workflow factory contract](#workflow-factory-contract).
+
 ## Running the Server

 In the same directory as `index.ts`, run the following command to start the server:
@@ -54,16 +57,86 @@ curl -X POST "http://localhost:3000/api/chat" -H "Content-Type: application/json

 The `LlamaIndexServer` accepts the following configuration options:

- `workflow`: A callable function that creates a workflow instance for each request
+- `workflow`: A callable function that creates a workflow instance for each request. See [Workflow factory contract](#workflow-factory-contract) for more details.
 - `uiConfig`: An object to configure the chat UI containing the following properties:
-  - `appTitle`: The title of the application (default: `"LlamaIndex App"`)
  - `starterQuestions`: List of starter questions for the chat UI (default: `[]`)
+  - `enableFileUpload`: Whether to enable file upload in the chat UI (default: `false`). See [Upload file example](./examples/private-file/README.md) for more details.
  - `componentsDir`: The directory for custom UI components rendering events emitted by the workflow. The default is undefined, which does not render custom UI components.
+  - `layoutDir`: The directory for custom layout sections. The default value is `layout`. See [Custom Layout](#custom-layout) for more details.
  - `llamaCloudIndexSelector`: Whether to show the LlamaCloud index selector in the chat UI (requires `LLAMA_CLOUD_API_KEY` to be set in the environment variables) (default: `false`)
+  - `dev_mode`: When enabled, you can update workflow code in the UI and see the changes immediately. It's currently in beta and only supports updating workflow code at `app/src/workflow.ts`. Please start server in dev mode (`npm run dev`) to use see this reload feature enabled.
+- `suggestNextQuestions`: Whether to suggest next questions after the assistant's response (default: `true`). You can change the prompt for the next questions by setting the `NEXT_QUESTION_PROMPT` environment variable.

 LlamaIndexServer accepts all the configuration options from Nextjs Custom Server such as `port`, `hostname`, `dev`, etc.
 See all Nextjs Custom Server options [here](https://nextjs.org/docs/app/building-your-application/configuring/custom-server).

+## Workflow factory contract
+
+The `workflow` provided will be called for each chat request to initialize a new workflow instance. For advanced use cases, you can define workflowFactory with a chatBody which include list of UI messages in the request body.
+
+```typescript
+import { type Message } from "ai";
+import { agent } from "@llamaindex/workflow";
+
+const workflowFactory = (chatBody: { messages: Message[] }) => {
+  ...
+};
+```
+
+The contract of the generated workflow must be the same as for the [Agent Workflow](https://ts.llamaindex.ai/docs/llamaindex/modules/agents/agent_workflow). This means that the workflow must handle a `startAgentEvent` event, which is the entry point of the workflow and contains the following information in it's `data` property:
+
+```typescript
+{
+  userInput: MessageContent;
+  chatHistory?: ChatMessage[] | undefined;
+};
+```
+
+The `userInput` is the latest user message and the `chatHistory` is the list of messages exchanged between the user and the workflow so far.
+
+Furthermore, the workflow must stop with a `stopAgentEvent` event to mark the end of the workflow. In between, the workflow can emit [UI events](##AI-generated-UI-Components) to render custom UI components and [Artifact events](##Sending-Artifacts-to-the-UI) to send structured data like generated documents or code snippets to the UI.
+
+```ts
+import {
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+} from "@llamaindex/workflow";
+import { ChatMemoryBuffer, type ChatMessage, Settings } from "llamaindex";
+import { openai } from "@llamaindex/openai";
+import { wiki } from "@llamaindex/tools";
+
+Settings.llm = openai("gpt-4o");
+
+export const workflowFactory = async () => {
+  const workflow = createWorkflow();
+
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { state, sendEvent } = getContext();
+    const messages = data.chatHistory;
+
+    const toolCallResponse = await chatWithTools(
+      Settings.llm,
+      [wiki()],
+      messages,
+    );
+
+    // using result from tool call and use `sendEvent` to emit the next event...
+  });
+
+  // define more workflow handling logic here...
+
+  // Finally stop with a `stopAgentEvent` event to mark the end of the workflow.
+  // return stopAgentEvent.with({
+  //   result: "This is the end!",
+  // });
+
+  return workflow;
+};
+```
+
+To generate sophisticated examples of workflows, you best use the [create-llama](https://github.com/run-llama/create-llama) project.
+
 ## AI-generated UI Components

 The LlamaIndex server provides support for rendering workflow events using custom UI components, allowing you to extend and customize the chat interface.
@@ -123,6 +196,28 @@ Feel free to modify the generated code to match your needs. If you're not satisf

 > Note that `generateEventComponent` is generating JSX code, but you can also provide a TSX file.

+## Custom Layout
+
+LlamaIndex Server supports custom layout for header and footer. To use custom layout, you need to initialize the LlamaIndex server with the `layoutDir` that contains your custom layout files.
+
+```ts
+new LlamaIndexServer({
+  workflow: createWorkflow,
+  uiConfig: {
+    layoutDir: "layout",
+  },
+}).start();
+```
+
+```
+layout/
+  header.tsx
+  footer.tsx
+```
+
+We currently support custom header and footer for the chat interface. The syntax for these files is the same as events components in components directory.
+Note that by default, we are still rendering the default LlamaIndex Header. It's also the fallback when having errors rendering the custom header. Example layout files will be generated in the `layout` directory of your project when creating a new project with `create-llama`.
+
 ### Server Setup

 To use the generated UI components, you need to initialize the LlamaIndex server with the `componentsDir` that contains your custom UI components:
@@ -131,12 +226,71 @@ To use the generated UI components, you need to initialize the LlamaIndex server
 new LlamaIndexServer({
  workflow: createWorkflow,
  uiConfig: {
-    appTitle: "LlamaIndex App",
    componentsDir: "components",
  },
 }).start();
 ```

+## Sending Artifacts to the UI
+
+In addition to UI events for custom components, LlamaIndex Server supports a special `ArtifactEvent` to send structured data like generated documents or code snippets to the UI. These artifacts are displayed in a dedicated "Canvas" panel in the chat interface.
+
+### Artifact Event Structure
+
+To send an artifact, your workflow needs to emit an event with `type: "artifact"`. The `data` payload of this event should include:
+
+- `type`: A string indicating the type of artifact (e.g., `"document"`, `"code"`).
+- `created_at`: A timestamp (e.g., `Date.now()`) indicating when the artifact was created.
+- `data`: An object containing the specific details of the artifact. The structure of this object depends on the artifact `type`.
+
+### Defining and Sending an ArtifactEvent
+
+First, define your artifact event using `workflowEvent` from `@llamaindex/workflow`:
+
+```typescript
+import { workflowEvent } from "@llamaindex/workflow";
+
+// Example for a document artifact
+const artifactEvent = workflowEvent<{
+  type: "artifact"; // Must be "artifact"
+  data: {
+    type: "document"; // Custom type for your artifact (e.g., "document", "code")
+    created_at: number;
+    data: {
+      // Specific data for the document artifact type
+      title: string;
+      content: string;
+      type: "markdown" | "html"; // document format
+    };
+  };
+}>();
+```
+
+Then, within your workflow logic, use `sendEvent` (obtained from `getContext()`) to emit the event:
+
+```typescript
+// Assuming 'sendEvent' is available in your workflow handler
+// and 'documentDetails' contains the content for the artifact.
+
+sendEvent(
+  artifactEvent.with({
+    type: "artifact", // This top-level type must be "artifact"
+    data: {
+      type: "document", // This is your specific artifact type
+      created_at: Date.now(),
+      data: {
+        title: "My Generated Document",
+        content: "# Hello World
+This is a markdown document.",
+        type: "markdown",
+      },
+    },
+  }),
+);
+```
+
+This will send the artifact to the LlamaIndex Server UI, where it will be rendered in the [ChatCanvasPanel](/packages/server/next/app/components/ui/chat/canvas/panel.tsx) by a renderer depending on the artifact type. For type `document` this is using the [DocumentArtifactViewer](https://github.com/run-llama/chat-ui/blob/bacb75fc6edceacf742fba18632404a2483b5a81/packages/chat-ui/src/chat/canvas/artifacts/document.tsx#L17).
+
 ## Default Endpoints and Features

 ### Chat Endpoint
@@ -156,6 +310,23 @@ The server always provides a chat interface at the root path (`/`) with:
 - The server automatically mounts the `data` and `output` folders at `{server_url}{api_prefix}/files/data` (default: `/api/files/data`) and `{server_url}{api_prefix}/files/output` (default: `/api/files/output`) respectively.
 - Your workflows can use both folders to store and access files. By convention, the `data` folder is used for documents that are ingested, and the `output` folder is used for documents generated by the workflow.

+### Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+By default, the ejected project will be in the `next` directory in the current working directory. You can change the output directory by providing custom path after `eject` command:
+
+```bash
+npm eject <path-to-output-directory>
+```
+
+How eject works:
+
+1. Init nextjs project with eslint, prettier, postcss, tailwindcss, shadcn components, etc.
+2. Copy your workflow definition and setting files in src/app/\* to the ejected project in app/api/chat
+3. Copy your components, data, output, storage folders to the ejected project
+4. Copy your current .env file to the ejected project
+5. Clean up files that are no longer needed and update imports
+
 ## API Reference

 - [LlamaIndexServer](https://ts.llamaindex.ai/docs/api/classes/LlamaIndexServer)
@@ -0,0 +1,172 @@
+#!/usr/bin/env node
+
+const fs = require("fs").promises;
+const path = require("path");
+
+// Resolve the project directory in node_modules/@llamaindex/server/project
+// This is the template that used to construct the nextjs project
+const projectDir = path.resolve(__dirname, "../project");
+
+// Resolve the src directory that contains workflow & setting files
+const srcDir = path.join(process.cwd(), "src");
+const srcAppDir = path.join(srcDir, "app");
+const generateFile = path.join(srcDir, "generate.ts");
+const envFile = path.join(process.cwd(), ".env");
+
+// The environment variables that are used as LlamaIndexServer configs
+const SERVER_CONFIG_VARS = [
+  {
+    key: "OPENAI_API_KEY",
+    defaultValue: "<your-openai-api-key>",
+    description: "OpenAI API key",
+  },
+  {
+    key: "SUGGEST_NEXT_QUESTIONS",
+    defaultValue: "true",
+    description: "Whether to suggest next questions (`suggestNextQuestions`)",
+  },
+  {
+    key: "COMPONENTS_DIR",
+    defaultValue: "components",
+    description: "Directory for custom components (`componentsDir`)",
+  },
+  {
+    key: "WORKFLOW_FILE_PATH",
+    defaultValue: "app/api/chat/app/workflow.ts",
+    description: "The path to the workflow file (will be updated in dev mode)",
+  },
+  {
+    key: "NEXT_PUBLIC_USE_COMPONENTS_DIR",
+    defaultValue: "true",
+    description: "Whether to enable components directory feature on frontend",
+  },
+  {
+    key: "NEXT_PUBLIC_DEV_MODE",
+    defaultValue: "true",
+    description: "Whether to enable dev mode (`devMode`)",
+  },
+  {
+    key: "NEXT_PUBLIC_STARTER_QUESTIONS",
+    defaultValue: '["Summarize the document", "What are the key points?"]',
+    description:
+      "Initial questions to display in the chat (`starterQuestions`)",
+  },
+  {
+    key: "NEXT_PUBLIC_SHOW_LLAMACLOUD_SELECTOR",
+    defaultValue: "false",
+    description:
+      "Whether to show LlamaCloud selector for frontend (`llamaCloudIndexSelector`)",
+  },
+];
+
+async function eject() {
+  try {
+    // validate required directories (nextjs project template, src directory, src/app directory)
+    const requiredDirs = [projectDir, srcDir, srcAppDir];
+    for (const dir of requiredDirs) {
+      const exists = await fs
+        .access(dir)
+        .then(() => true)
+        .catch(() => false);
+      if (!exists) {
+        console.error("Error: directory does not exist at", dir);
+        process.exit(1);
+      }
+    }
+
+    // Get destination directory from command line arguments (pnpm eject <path>)
+    const args = process.argv;
+    const outputIndex = args.indexOf("eject");
+    const destDir =
+      outputIndex !== -1 && args[outputIndex + 1]
+        ? path.resolve(args[outputIndex + 1]) // Use provided path after eject
+        : path.join(process.cwd(), "next"); // Default to "next" folder in the current working directory
+
+    // remove destination directory if it exists
+    await fs.rm(destDir, { recursive: true, force: true });
+
+    // create destination directory
+    await fs.mkdir(destDir, { recursive: true });
+
+    // Copy the nextjs project template to the destination directory
+    await fs.cp(projectDir, destDir, { recursive: true });
+
+    // copy src/app/* to destDir/app/api/chat
+    const chatRouteDir = path.join(destDir, "app", "api", "chat");
+    await fs.cp(srcAppDir, path.join(chatRouteDir, "app"), { recursive: true });
+
+    // nextjs project doesn't depend on @llamaindex/server anymore, we need to update the imports in workflow file
+    const workflowFile = path.join(chatRouteDir, "app", "workflow.ts");
+    let workflowContent = await fs.readFile(workflowFile, "utf-8");
+    workflowContent = workflowContent.replace("@llamaindex/server", "../utils");
+    await fs.writeFile(workflowFile, workflowContent);
+
+    // copy generate.ts if it exists
+    const genFilePath = path.join(chatRouteDir, "generate.ts");
+    const genFileExists = await copy(generateFile, genFilePath);
+    if (genFileExists) {
+      // update the import @llamaindex/server in generate.ts
+      let genContent = await fs.readFile(genFilePath, "utf-8");
+      genContent = genContent.replace("@llamaindex/server", "./utils");
+      await fs.writeFile(genFilePath, genContent);
+    }
+
+    // copy folders in root directory if exists
+    const rootFolders = ["components", "data", "output", "storage"];
+    for (const folder of rootFolders) {
+      await copy(path.join(process.cwd(), folder), path.join(destDir, folder));
+    }
+
+    // copy .env if it exists or create a new one
+    const envFileExists = await copy(envFile, path.join(destDir, ".env"));
+    if (!envFileExists) {
+      await fs.writeFile(path.join(destDir, ".env"), "");
+    }
+
+    // update .env file with more server configs
+    let envFileContent = await fs.readFile(path.join(destDir, ".env"), "utf-8");
+    for (const envVar of SERVER_CONFIG_VARS) {
+      const { key, defaultValue, description } = envVar;
+      if (!envFileContent.includes(key)) {
+        // if the key is not exists in the env file, add it
+        envFileContent += `\n# ${description}\n${key}=${defaultValue}\n`;
+      }
+    }
+    await fs.writeFile(path.join(destDir, ".env"), envFileContent);
+
+    // rename gitignore -> .gitignore
+    await fs.rename(
+      path.join(destDir, "gitignore"),
+      path.join(destDir, ".gitignore"),
+    );
+
+    // user can customize layout directory in nextjs project, remove layout api
+    await fs.rm(path.join(destDir, "app", "api", "layout"), {
+      recursive: true,
+      force: true,
+    });
+
+    // remove no-needed files
+    await fs.unlink(path.join(destDir, "public", "config.js"));
+    await fs.unlink(path.join(destDir, "next-build.config.ts"));
+
+    console.log("Successfully ejected @llamaindex/server to", destDir);
+  } catch (error) {
+    console.error("Error during eject:", error.message);
+    process.exit(1);
+  }
+}
+
+// copy src to dest if src exists, return true if src exists
+async function copy(src, dest) {
+  const srcExists = await fs
+    .access(src)
+    .then(() => true)
+    .catch(() => false);
+  if (srcExists) {
+    await fs.cp(src, dest, { recursive: true });
+  }
+  return srcExists;
+}
+
+eject();
@@ -0,0 +1,186 @@
+# LlamaIndex Server Examples
+
+This package contains practical examples demonstrating how to use the `@llamaindex/server` package to build chat applications with LlamaIndex workflows.
+
+## Package Overview
+
+The examples package is a collection of standalone TypeScript applications that showcase different features and capabilities of the LlamaIndex Server framework. Each example can be run independently to demonstrate specific functionality.
+
+## Key Features Demonstrated
+
+### 1. Simple Workflow (`simple-workflow/calculator.ts`)
+
+- **Purpose**: Basic agent workflow with tool integration
+- **Features**: Calculator agent with add tool, starter questions
+- **Key Concepts**: Tool definition with Zod schemas, basic server setup
+
+### 2. Agentic RAG (`agentic-rag/index.ts`)
+
+- **Purpose**: Retrieval-Augmented Generation with document querying
+- **Features**: Vector store index, document ingestion, query engine tool, automatic question suggestions
+- **Key Concepts**: RAG implementation, source node inclusion, embedding models
+
+### 3. Custom Layout (`custom-layout/index.ts` + `layout/header.tsx`)
+
+- **Purpose**: Custom UI components and layout customization
+- **Features**: Weather agent with custom header layout, branded interface
+- **Key Concepts**: Layout directory configuration, React component integration
+
+### 4. Development Mode (`devmode/index.ts` + `src/app/workflow.ts`)
+
+- **Purpose**: Live development and hot reloading capabilities
+- **Features**: Dev mode panel, workflow file hot reloading, separate workflow file structure
+- **Key Concepts**: Development workflow, file watching, modular architecture
+
+## Development Scripts
+
+```bash
+# Type checking
+pnpm typecheck
+
+# Run development server (defaults to simple-workflow/calculator.ts)
+pnpm dev
+
+# Run specific examples
+npx nodemon --exec tsx agentic-rag/index.ts
+npx nodemon --exec tsx custom-layout/index.ts
+npx nodemon --exec tsx devmode/index.ts --ignore src/app/workflow_*.ts  # Dev mode with file watching
+```
+
+## Environment Setup
+
+All examples require OpenAI API access:
+
+```bash
+export OPENAI_API_KEY=your_openai_api_key
+```
+
+## Dependencies
+
+### Core Dependencies
+
+- `@llamaindex/server`: Main server framework (workspace dependency)
+- `@llamaindex/workflow`: Workflow engine for agent creation
+- `@llamaindex/openai`: OpenAI LLM and embedding integrations
+- `@llamaindex/tools`: Tool utilities
+- `@llamaindex/readers`: Document readers
+- `llamaindex`: Core LlamaIndex library
+- `zod`: Schema validation for tools
+
+### Development Dependencies
+
+- `tsx`: TypeScript execution for development
+- `nodemon`: File watching and auto-restart
+- `typescript`: TypeScript compiler
+
+## Architecture Patterns
+
+### Workflow Factory Pattern
+
+All examples use the workflow factory pattern:
+
+```typescript
+const workflowFactory = () => agent({ tools: [...] });
+// or
+const workflowFactory = async () => { /* setup logic */ return agent({ tools: [...] }); };
+```
+
+### Server Configuration
+
+Standard server setup pattern:
+
+```typescript
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  uiConfig: {
+    /* UI configuration */
+  },
+  port: 3000,
+}).start();
+```
+
+### Tool Definition Pattern
+
+Consistent tool creation with Zod schemas:
+
+```typescript
+tool({
+  name: "tool_name",
+  description: "Tool description",
+  parameters: z.object({
+    /* parameters */
+  }),
+  execute: (params) => {
+    /* implementation */
+  },
+});
+```
+
+## Example-Specific Features
+
+### Simple Workflow
+
+- Basic arithmetic operations
+- Minimal setup for learning
+- Demonstrates core workflow concepts
+
+### Agentic RAG
+
+- Document indexing with embeddings
+- Vector similarity search
+- Source node tracking for citations
+- Auto-generated follow-up questions
+
+### Custom Layout
+
+- Custom React components in `layout/` directory
+- Branded header with navigation
+- Layout directory configuration (`layoutDir: "layout"`)
+
+### Dev Mode
+
+- Live code editing in browser
+- Hot reloading of workflow files
+- Separate workflow file organization
+- Development panel UI
+
+## TypeScript Configuration
+
+- Target: ES2022 with bundler module resolution
+- Strict type checking enabled
+- Excludes: `node_modules`, `dist`, `custom-layout/layout` (runtime components)
+- Output: `dist/` directory
+
+## Development Workflow
+
+1. **Choose Example**: Select appropriate example for your use case
+2. **Environment Setup**: Configure OpenAI API key
+3. **Run Development Server**: Use `pnpm dev` or specific nodemon commands
+4. **Access UI**: Open browser at `http://localhost:3000`
+5. **Iterate**: Modify code and see changes in real-time
+
+## Common Patterns
+
+### Agent Creation
+
+All examples use the `agent()` function from `@llamaindex/workflow` with tool arrays.
+
+### UI Configuration
+
+- `starterQuestions`: Predefined questions for user guidance
+- `layoutDir`: Custom layout components directory
+- `devMode`: Enable development features
+- `suggestNextQuestions`: Auto-generate follow-up questions
+
+### Error Handling
+
+Examples demonstrate proper async/await patterns and error handling for LLM operations.
+
+## Integration Points
+
+- **LlamaIndex Core**: Document processing, indexing, querying
+- **OpenAI**: LLM and embedding model integration
+- **React/Next.js**: Frontend UI components and server-side rendering
+- **TypeScript**: Type safety throughout the application stack
+
+This examples package serves as a comprehensive reference for building production-ready chat applications with LlamaIndex workflows.
@@ -0,0 +1,38 @@
+# LlamaIndex Server Examples
+
+This directory provides example projects demonstrating how to use the LlamaIndex Server.
+
+## How to Run the Examples
+
+1. **Install dependencies**
+
+   In the root of this directory, run:
+
+   ```bash
+   pnpm install
+   ```
+
+2. **Set your OpenAI API key**
+
+   Export your OpenAI API key as an environment variable:
+
+   ```bash
+   export OPENAI_API_KEY=your_openai_api_key
+   ```
+
+3. **Start an example**
+
+   Replace `<example>` with the name of the example you want to run (e.g., `private-file`):
+
+   ```bash
+   pnpm nodemon --exec tsx <example>/index.ts
+   ```
+
+4. **Open the application in your browser**
+
+   Visit [http://localhost:3000](http://localhost:3000) to interact with the running example.
+
+## Notes
+
+- Make sure you have [pnpm](https://pnpm.io/) installed.
+- Each example may have its own specific instructions or requirements; check the individual example's index.ts for details.
@@ -0,0 +1,38 @@
+import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
+import { LlamaIndexServer } from "@llamaindex/server";
+import { agent } from "@llamaindex/workflow";
+import { Document, Settings, VectorStoreIndex } from "llamaindex";
+
+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
+Settings.embedModel = new OpenAIEmbedding({
+  model: "text-embedding-3-small",
+});
+
+export const workflowFactory = async () => {
+  const index = await VectorStoreIndex.fromDocuments([
+    new Document({ text: "The dog is brown" }),
+    new Document({ text: "The dog is yellow" }),
+  ]);
+
+  const queryEngineTool = index.queryTool({
+    metadata: {
+      name: "query_document",
+      description: `This tool can retrieve information in documents`,
+    },
+    includeSourceNodes: true,
+  });
+
+  return agent({ tools: [queryEngineTool] });
+};
+
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  suggestNextQuestions: true,
+  uiConfig: {
+    starterQuestions: ["What is the color of the dog?"],
+  },
+  port: 3000,
+}).start();
@@ -0,0 +1,22 @@
+This example demonstrates how to use the code generation workflow.
+
+```ts
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  uiConfig: {
+    starterQuestions: [
+      "Generate a calculator app",
+      "Create a simple todo list app",
+    ],
+    componentsDir: "components",
+  },
+  port: 3000,
+}).start();
+```
+
+Export OpenAI API key and start the server in dev mode.
+
+```bash
+export OPENAI_API_KEY=<your-openai-api-key>
+npx nodemon --exec tsx index.ts
+```
@@ -0,0 +1,132 @@
+import { Badge } from "@/components/ui/badge";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Progress } from "@/components/ui/progress";
+import { Skeleton } from "@/components/ui/skeleton";
+import { cn } from "@/lib/utils";
+import { Markdown } from "@llamaindex/chat-ui/widgets";
+import { ListChecks, Loader2, Wand2 } from "lucide-react";
+import { useEffect, useState } from "react";
+
+const STAGE_META = {
+  plan: {
+    icon: ListChecks,
+    badgeText: "Step 1/2: Planning",
+    gradient: "from-blue-100 via-blue-50 to-white",
+    progress: 33,
+    iconBg: "bg-blue-100 text-blue-600",
+    badge: "bg-blue-100 text-blue-700",
+  },
+  generate: {
+    icon: Wand2,
+    badgeText: "Step 2/2: Generating",
+    gradient: "from-violet-100 via-violet-50 to-white",
+    progress: 66,
+    iconBg: "bg-violet-100 text-violet-600",
+    badge: "bg-violet-100 text-violet-700",
+  },
+};
+
+function ArtifactWorkflowCard({ event }) {
+  const [visible, setVisible] = useState(event?.state !== "completed");
+  const [fade, setFade] = useState(false);
+
+  useEffect(() => {
+    if (event?.state === "completed") {
+      setVisible(false);
+    } else {
+      setVisible(true);
+      setFade(false);
+    }
+  }, [event?.state]);
+
+  if (!event || !visible) return null;
+
+  const { state, requirement } = event;
+  const meta = STAGE_META[state];
+
+  if (!meta) return null;
+
+  return (
+    <div className="flex min-h-[180px] w-full items-center justify-center py-2">
+      <Card
+        className={cn(
+          "w-full rounded-xl shadow-md transition-all duration-500",
+          "border-0",
+          fade && "pointer-events-none opacity-0",
+          `bg-gradient-to-br ${meta.gradient}`,
+        )}
+        style={{
+          boxShadow:
+            "0 2px 12px 0 rgba(80, 80, 120, 0.08), 0 1px 3px 0 rgba(80, 80, 120, 0.04)",
+        }}
+      >
+        <CardHeader className="flex flex-row items-center gap-2 px-3 pb-1 pt-2">
+          <div
+            className={cn(
+              "flex items-center justify-center rounded-full p-1",
+              meta.iconBg,
+            )}
+          >
+            <meta.icon className="h-5 w-5" />
+          </div>
+          <CardTitle className="flex items-center gap-2 text-base font-semibold">
+            <Badge className={cn("ml-1", meta.badge, "px-2 py-0.5 text-xs")}>
+              {meta.badgeText}
+            </Badge>
+          </CardTitle>
+        </CardHeader>
+        <CardContent className="px-3 py-1">
+          {state === "plan" && (
+            <div className="flex flex-col items-center gap-2 py-2">
+              <Loader2 className="mb-1 h-6 w-6 animate-spin text-blue-400" />
+              <div className="text-center text-sm font-medium text-blue-900">
+                Analyzing your request...
+              </div>
+              <Skeleton className="mt-1 h-3 w-1/2 rounded-full" />
+            </div>
+          )}
+          {state === "generate" && (
+            <div className="flex flex-col gap-2 py-2">
+              <div className="flex items-center gap-1">
+                <Loader2 className="h-4 w-4 animate-spin text-violet-400" />
+                <span className="text-sm font-medium text-violet-900">
+                  Working on the requirement:
+                </span>
+              </div>
+              <div className="max-h-24 overflow-auto rounded-lg border border-violet-200 bg-violet-50 px-2 py-1 text-xs">
+                {requirement ? (
+                  <Markdown content={requirement} />
+                ) : (
+                  <span className="italic text-violet-400">
+                    No requirements available yet.
+                  </span>
+                )}
+              </div>
+            </div>
+          )}
+        </CardContent>
+        <div className="px-3 pb-2 pt-1">
+          <Progress
+            value={meta.progress}
+            className={cn(
+              "h-1 rounded-full bg-gray-200",
+              state === "plan" && "bg-blue-200",
+              state === "generate" && "bg-violet-200",
+            )}
+          />
+        </div>
+      </Card>
+    </div>
+  );
+}
+
+export default function Component({ events }) {
+  const aggregateEvents = () => {
+    if (!events || events.length === 0) return null;
+    return events[events.length - 1];
+  };
+
+  const event = aggregateEvents();
+
+  return <ArtifactWorkflowCard event={event} />;
+}
@@ -0,0 +1,20 @@
+import { OpenAI } from "@llamaindex/openai";
+import { LlamaIndexServer } from "@llamaindex/server";
+import { Settings } from "llamaindex";
+import { workflowFactory } from "./src/app/workflow";
+
+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  uiConfig: {
+    starterQuestions: [
+      "Generate a calculator app",
+      "Create a simple todo list app",
+    ],
+    componentsDir: "components",
+  },
+  port: 3000,
+}).start();
@@ -0,0 +1,337 @@
+import { artifactEvent, extractLastArtifact } from "@llamaindex/server";
+import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";
+
+import {
+  agentStreamEvent,
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+  stopAgentEvent,
+  workflowEvent,
+} from "@llamaindex/workflow";
+
+import { z } from "zod";
+
+export const RequirementSchema = z.object({
+  next_step: z.enum(["answering", "coding"]),
+  language: z.string().nullable().optional(),
+  file_name: z.string().nullable().optional(),
+  requirement: z.string(),
+});
+
+export type Requirement = z.infer<typeof RequirementSchema>;
+
+export const UIEventSchema = z.object({
+  type: z.literal("ui_event"),
+  data: z.object({
+    state: z
+      .enum(["plan", "generate", "completed"])
+      .describe(
+        "The current state of the workflow: 'plan', 'generate', or 'completed'.",
+      ),
+    requirement: z
+      .string()
+      .optional()
+      .describe(
+        "An optional requirement creating or updating a code, if applicable.",
+      ),
+  }),
+});
+
+export type UIEvent = z.infer<typeof UIEventSchema>;
+const planEvent = workflowEvent<{
+  userInput: MessageContent;
+  context?: string | undefined;
+}>();
+
+const generateArtifactEvent = workflowEvent<{
+  requirement: Requirement;
+}>();
+
+const synthesizeAnswerEvent = workflowEvent<object>();
+
+const uiEvent = workflowEvent<UIEvent>();
+
+export function workflowFactory(reqBody: unknown) {
+  const llm = Settings.llm;
+
+  const { withState, getContext } = createStatefulMiddleware(() => {
+    return {
+      memory: new ChatMemoryBuffer({ llm }),
+      lastArtifact: extractLastArtifact(reqBody),
+    };
+  });
+  const workflow = withState(createWorkflow());
+
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { userInput, chatHistory = [] } = data;
+    // Prepare chat history
+    const { state } = getContext();
+    // Put user input to the memory
+    if (!userInput) {
+      throw new Error("Missing user input to start the workflow");
+    }
+    state.memory.set(chatHistory);
+    state.memory.put({ role: "user", content: userInput });
+
+    return planEvent.with({
+      userInput: userInput,
+      context: state.lastArtifact
+        ? JSON.stringify(state.lastArtifact)
+        : undefined,
+    });
+  });
+
+  workflow.handle([planEvent], async ({ data: planData }) => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "plan",
+        },
+      }),
+    );
+    const user_msg = planData.userInput;
+    const context = planData.context
+      ? `## The context is: \n${planData.context}\n`
+      : "";
+    const prompt = `
+You are a product analyst responsible for analyzing the user's request and providing the next step for code or document generation.
+You are helping user with their code artifact. To update the code, you need to plan a coding step.
+
+Follow these instructions:
+1. Carefully analyze the conversation history and the user's request to determine what has been done and what the next step should be.
+2. The next step must be one of the following two options:
+    - "coding": To make the changes to the current code.
+    - "answering": If you don't need to update the current code or need clarification from the user.
+Important: Avoid telling the user to update the code themselves, you are the one who will update the code (by planning a coding step).
+3. If the next step is "coding", you may specify the language ("typescript" or "python") and file_name if known, otherwise set them to null. 
+4. The requirement must be provided clearly what is the user request and what need to be done for the next step in details
+    as precise and specific as possible, don't be stingy with in the requirement.
+5. If the next step is "answering", set language and file_name to null, and the requirement should describe what to answer or explain to the user.
+6. Be concise; only return the requirements for the next step.
+7. The requirements must be in the following format:
+    \`\`\`json
+    {
+        "next_step": "answering" | "coding",
+        "language": "typescript" | "python" | null,
+        "file_name": string | null,
+        "requirement": string
+    }
+    \`\`\`
+
+## Example 1:
+User request: Create a calculator app.
+You should return:
+\`\`\`json
+{
+    "next_step": "coding",
+    "language": "typescript",
+    "file_name": "calculator.tsx",
+    "requirement": "Generate code for a calculator app that has a simple UI with a display and button layout. The display should show the current input and the result. The buttons should include basic operators, numbers, clear, and equals. The calculation should work correctly."
+}
+\`\`\`
+
+## Example 2:
+User request: Explain how the game loop works.
+Context: You have already generated the code for a snake game.
+You should return:
+\`\`\`json
+{
+    "next_step": "answering",
+    "language": null,
+    "file_name": null,
+    "requirement": "The user is asking about the game loop. Explain how the game loop works."
+}
+\`\`\`
+
+${context}
+
+Now, plan the user's next step for this request:
+${user_msg}
+`;
+
+    const response = await llm.complete({
+      prompt,
+    });
+    // parse the response to Requirement
+    // 1. use regex to find the json block
+    const jsonBlock = response.text.match(/```json\s*([\s\S]*?)\s*```/);
+    if (!jsonBlock) {
+      throw new Error("No JSON block found in the response.");
+    }
+    const requirement = RequirementSchema.parse(JSON.parse(jsonBlock[1]));
+    state.memory.put({
+      role: "assistant",
+      content: `The plan for next step: \n${response.text}`,
+    });
+
+    if (requirement.next_step === "coding") {
+      return generateArtifactEvent.with({
+        requirement,
+      });
+    } else {
+      return synthesizeAnswerEvent.with({});
+    }
+  });
+
+  workflow.handle([generateArtifactEvent], async ({ data: planData }) => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "generate",
+          requirement: planData.requirement.requirement,
+        },
+      }),
+    );
+
+    const previousArtifact = state.lastArtifact
+      ? JSON.stringify(state.lastArtifact)
+      : "There is no previous artifact";
+    const requirementText = planData.requirement.requirement;
+
+    const prompt = `
+        You are a skilled developer who can help user with coding.
+        You are given a task to generate or update a code for a given requirement.
+
+        ## Follow these instructions:
+        **1. Carefully read the user's requirements.** 
+           If any details are ambiguous or missing, make reasonable assumptions and clearly reflect those in your output.
+           If the previous code is provided:
+           + Carefully analyze the code with the request to make the right changes.
+           + Avoid making a lot of changes from the previous code if the request is not to write the code from scratch again.
+        **2. For code requests:**
+           - If the user does not specify a framework or language, default to a React component using the Next.js framework.
+           - For Next.js, use Shadcn UI components, Typescript, @types/node, @types/react, @types/react-dom, PostCSS, and TailwindCSS.
+           The import pattern should be:
+           \`\`\`typescript
+           import { ComponentName } from "@/components/ui/component-name"
+           import { Markdown } from "@llamaindex/chat-ui"
+           import { cn } from "@/lib/utils"
+           \`\`\`
+           - Ensure the code is idiomatic, production-ready, and includes necessary imports.
+           - Only generate code relevant to the user's request—do not add extra boilerplate.
+        **3. Don't be verbose on response**
+           - No other text or comments only return the code which wrapped by \`\`\`language\`\`\` block.
+           - If the user's request is to update the code, only return the updated code.
+        **4. Only the following languages are allowed: "typescript", "python".**
+        **5. If there is no code to update, return the reason without any code block.**
+           
+        ## Example:
+        \`\`\`typescript
+        import React from "react";
+        import { Button } from "@/components/ui/button";
+        import { cn } from "@/lib/utils";
+
+        export default function MyComponent() {
+        return (
+           <div className="flex flex-col items-center justify-center h-screen">
+              <Button>Click me</Button>
+           </div>
+        );
+        }
+        \`\`\`
+
+        The previous code is:
+        {previousArtifact}
+
+        Now, i have to generate the code for the following requirement:
+        {requirement}
+      `
+      .replace("{previousArtifact}", previousArtifact)
+      .replace("{requirement}", requirementText);
+
+    const response = await llm.complete({
+      prompt,
+    });
+
+    // Extract the code from the response
+    const codeMatch = response.text.match(/```(\w+)([\s\S]*)```/);
+    if (!codeMatch) {
+      return synthesizeAnswerEvent.with({});
+    }
+
+    const code = codeMatch[2].trim();
+
+    // Put the generated code to the memory
+    state.memory.put({
+      role: "assistant",
+      content: `Updated the code: \n${response.text}`,
+    });
+
+    // To show the Canvas panel for the artifact
+    sendEvent(
+      artifactEvent.with({
+        type: "artifact",
+        data: {
+          type: "code",
+          created_at: Date.now(),
+          data: {
+            language: planData.requirement.language || "",
+            file_name: planData.requirement.file_name || "",
+            code,
+          },
+        },
+      }),
+    );
+
+    return synthesizeAnswerEvent.with({});
+  });
+
+  workflow.handle([synthesizeAnswerEvent], async () => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+
+    const chatHistory = await state.memory.getMessages();
+    const messages = [
+      ...chatHistory,
+      {
+        role: "system" as const,
+        content: `
+        You are a helpful assistant who is responsible for explaining the work to the user.
+        Based on the conversation history, provide an answer to the user's question. 
+        The user has access to the code so avoid mentioning the whole code again in your response.
+      `,
+      },
+    ];
+
+    const responseStream = await llm.chat({
+      messages,
+      stream: true,
+    });
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "completed",
+        },
+      }),
+    );
+
+    let response = "";
+    for await (const chunk of responseStream) {
+      response += chunk.delta;
+      sendEvent(
+        agentStreamEvent.with({
+          delta: chunk.delta,
+          response: "",
+          currentAgentName: "assistant",
+          raw: chunk,
+        }),
+      );
+    }
+
+    return stopAgentEvent.with({
+      result: response,
+    });
+  });
+
+  return workflow;
+}
@@ -0,0 +1,32 @@
+import { OpenAI } from "@llamaindex/openai";
+import { LlamaIndexServer } from "@llamaindex/server";
+import { agent } from "@llamaindex/workflow";
+import { Settings, tool } from "llamaindex";
+import { z } from "zod";
+
+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
+const weatherAgent = agent({
+  tools: [
+    tool({
+      name: "weather",
+      description: "Get the weather in a given city",
+      parameters: z.object({ city: z.string() }),
+      execute: ({ city }) => `The weather in ${city} is sunny`,
+    }),
+  ],
+});
+
+new LlamaIndexServer({
+  workflow: () => weatherAgent,
+  uiConfig: {
+    starterQuestions: [
+      "What is the weather in Tokyo?",
+      "What is the weather in Ho Chi Minh City?",
+    ],
+    layoutDir: "layout",
+  },
+  port: 3000,
+}).start();
@@ -0,0 +1,40 @@
+"use client";
+
+import { Sparkles, Star } from "lucide-react";
+
+export default function Header() {
+  return (
+    <div className="flex items-center justify-between p-2 px-4">
+      <div className="flex items-center gap-2">
+        <Sparkles className="size-4" />
+        <h1 className="font-semibold">LlamaIndex App</h1>
+      </div>
+      <div className="flex items-center justify-end gap-4">
+        <div className="flex items-center gap-2">
+          <a
+            href="https://www.llamaindex.ai/"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-sm text-gray-600 hover:text-gray-800 dark:text-gray-400 dark:hover:text-gray-200"
+          >
+            Built by LlamaIndex
+          </a>
+          <img
+            className="h-[24px] w-[24px] rounded-sm"
+            src="/llama.png"
+            alt="Llama Logo"
+          />
+        </div>
+        <a
+          href="https://github.com/run-llama/LlamaIndexTS"
+          target="_blank"
+          rel="noopener noreferrer"
+          className="hover:bg-accent flex items-center gap-2 rounded-md border border-gray-300 px-2 py-1 text-sm"
+        >
+          <Star className="size-4" />
+          Star on GitHub
+        </a>
+      </div>
+    </div>
+  );
+}
@@ -0,0 +1,20 @@
+This example shows how to use the dev mode of the server.
+
+First, we need to set `devMode` to `true` in the `uiConfig` of the server.
+
+```ts
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  uiConfig: {
+    devMode: true,
+  },
+  port: 3000,
+}).start();
+```
+
+Export OpenAI API key and start the server in dev mode.
+
+```bash
+export OPENAI_API_KEY=<your-openai-api-key>
+npx nodemon --exec tsx index.ts --ignore src/app/workflow_*.ts
+```
@@ -0,0 +1,20 @@
+import { OpenAI } from "@llamaindex/openai";
+import { LlamaIndexServer } from "@llamaindex/server";
+import { Settings } from "llamaindex";
+import { workflowFactory } from "./src/app/workflow";
+
+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  uiConfig: {
+    devMode: true,
+    starterQuestions: [
+      "What is the weather in Tokyo?",
+      "What is the weather in New York?",
+    ],
+  },
+  port: 3000,
+}).start();
@@ -0,0 +1,16 @@
+import { agent } from "@llamaindex/workflow";
+import { tool } from "llamaindex";
+import { z } from "zod";
+
+export const workflowFactory = async () => {
+  return agent({
+    tools: [
+      tool({
+        name: "weather",
+        description: "Get the weather in a specific city",
+        parameters: z.object({ city: z.string() }),
+        execute: ({ city }) => `The weather in ${city} is sunny`,
+      }),
+    ],
+  });
+};
@@ -0,0 +1,24 @@
+{
+  "name": "llamaindex-server-examples",
+  "version": "0.0.1",
+  "private": true,
+  "scripts": {
+    "typecheck": "tsc --noEmit",
+    "dev": "nodemon --exec tsx simple-workflow/calculator.ts"
+  },
+  "dependencies": {
+    "@llamaindex/openai": "~0.4.0",
+    "@llamaindex/readers": "~3.1.4",
+    "@llamaindex/server": "workspace:*",
+    "@llamaindex/tools": "~0.0.11",
+    "dotenv": "^16.4.7",
+    "llamaindex": "~0.11.0",
+    "zod": "^3.24.2"
+  },
+  "devDependencies": {
+    "@types/node": "^20.10.3",
+    "nodemon": "^3.1.10",
+    "tsx": "^4.7.2",
+    "typescript": "^5.3.2"
+  }
+}
@@ -0,0 +1,68 @@
+# Upload File Example
+
+This example shows how to use the uploaded file (private file) from the user in the workflow.
+
+## Prerequisites
+
+Please follow the setup instructions in the [examples README](../README.md).
+
+You will also need:
+
+- An OpenAI API key
+- The `enableFileUpload` option in the `uiConfig` is set to `true`.
+
+```typescript
+new LlamaIndexServer({
+  // ... other options
+  uiConfig: { enableFileUpload: true },
+}).start();
+```
+
+## How to get the uploaded files in your workflow:
+
+In LlamaIndexServer, the uploaded file is included in chat message annotations. You can easily get the uploaded files from chat messages using the [extractFileAttachments](https://github.com/llamaindex/llamaindex/blob/main/packages/server/src/utils/events.ts) function.
+
+```typescript
+import { type Message } from "ai";
+import { extractFileAttachments } from "@llamaindex/server";
+
+async function workflowFactory(reqBody: { messages: Message[] }) {
+  const attachments = extractFileAttachments(reqBody.messages);
+  // ...
+}
+```
+
+### AgentWorkflow
+
+If you are using AgentWorkflow, to provide file access to the agent, you can create a tool to read the file content. We recommend to use the `fileId` as the parameter of the tool instead of the `filePath` to avoid showing internal file path to the user. You can use the `getStoredFilePath` helper function to get the file path from the file id.
+
+```typescript
+import { getStoredFilePath, extractFileAttachments } from "@llamaindex/server";
+
+const readFileTool = tool(
+  ({ fileId }) => {
+    // Get the file path from the file id
+    const filePath = getStoredFilePath({ id: fileId });
+    return fsPromises.readFile(filePath, "utf8");
+  },
+  {
+    name: "read_file",
+    description: `Use this tool with the file id to read the file content. The available file are: [${attachments.map((file) => file.id).join(", ")}]`,
+    parameters: z.object({
+      fileId: z.string(),
+    }),
+  },
+);
+```
+
+**Tip:** You can either put the attachments file information to the tool description or agent's system prompt.
+
+Check: [agent-workflow.ts](./agent-workflow.ts) for the full example.
+
+### Custom Workflow
+
+In custom workflow, instead of defining a tool, you can use the helper functions (`extractFileAttachments` and `getStoredFilePath`) to work with file attachments in your workflow.
+
+Check: [custom-workflow.ts](./custom-workflow.ts) for the full example.
+
+> To run custom workflow example, update the `index.ts` file to use the `workflowFactory` from `custom-workflow.ts` instead of `agent-workflow.ts`.
@@ -0,0 +1,39 @@
+import { extractFileAttachments, getStoredFilePath } from "@llamaindex/server";
+import { agent } from "@llamaindex/workflow";
+import { type Message } from "ai";
+import { tool } from "llamaindex";
+import { promises as fsPromises } from "node:fs";
+import { z } from "zod";
+
+export const workflowFactory = async (reqBody: { messages: Message[] }) => {
+  const { messages } = reqBody;
+  // Extract the files from the messages
+  const files = extractFileAttachments(messages);
+  const fileIds = files.map((file) => file.id);
+
+  // Define a tool to read the file content using the id
+  const readFileTool = tool(
+    ({ fileId }) => {
+      if (!fileIds.includes(fileId)) {
+        throw new Error(`File with id ${fileId} not found`);
+      }
+
+      const filePath = getStoredFilePath({ id: fileId });
+      return fsPromises.readFile(filePath, "utf8");
+    },
+    {
+      name: "read_file",
+      description: `Use this tool with the id of the file to read the file content. Here are the available file ids: [${fileIds.join(", ")}]`,
+      parameters: z.object({
+        fileId: z.string(),
+      }),
+    },
+  );
+  return agent({
+    tools: [readFileTool],
+    systemPrompt: `
+      You are a helpful assistant that can help the user with their file.
+      You can use the read_file tool to read the file content.
+    `,
+  });
+};
@@ -0,0 +1,98 @@
+import { extractFileAttachments } from "@llamaindex/server";
+import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";
+
+import {
+  agentStreamEvent,
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+  stopAgentEvent,
+  workflowEvent,
+} from "@llamaindex/workflow";
+import { Message } from "ai";
+import { promises as fsPromises } from "node:fs";
+
+const fileHelperEvent = workflowEvent<{
+  userInput: MessageContent;
+  fileContent: string;
+}>();
+
+/**
+ * This is an simple workflow to demonstrate how to use uploaded files in the workflow.
+ */
+export function workflowFactory(reqBody: { messages: Message[] }) {
+  const llm = Settings.llm;
+
+  // First, extract the uploaded file from the messages
+  const attachments = extractFileAttachments(reqBody.messages);
+
+  if (attachments.length === 0) {
+    throw new Error("Please upload a file to start");
+  }
+
+  // Then, add the uploaded file info to the workflow state
+  const { withState, getContext } = createStatefulMiddleware(() => {
+    return {
+      memory: new ChatMemoryBuffer({ llm }),
+      uploadedFile: attachments[attachments.length - 1],
+    };
+  });
+  const workflow = withState(createWorkflow());
+
+  // Handle the start of the workflow: read the file content
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { userInput } = data;
+    // Prepare chat history
+    const { state } = getContext();
+    if (!userInput) {
+      throw new Error("Missing user input to start the workflow");
+    }
+    state.memory.put({ role: "user", content: userInput });
+
+    // Read file content
+    const fileContent = await fsPromises.readFile(
+      state.uploadedFile.path,
+      "utf8",
+    );
+
+    return fileHelperEvent.with({
+      userInput,
+      fileContent,
+    });
+  });
+
+  // Use LLM to help the user with the file content
+  workflow.handle([fileHelperEvent], async ({ data }) => {
+    const { sendEvent } = getContext();
+
+    const prompt = `
+You are a helpful assistant that can help the user with their file.
+
+Here is the provided file content:
+${data.fileContent}
+
+Now, let help the user with this request:
+${data.userInput}
+`;
+
+    const response = await llm.complete({
+      prompt,
+      stream: true,
+    });
+
+    // Stream the response
+    for await (const chunk of response) {
+      sendEvent(
+        agentStreamEvent.with({
+          delta: chunk.text,
+          response: chunk.text,
+          currentAgentName: "agent",
+          raw: chunk.raw,
+        }),
+      );
+    }
+    sendEvent(stopAgentEvent.with({ result: "" }));
+  });
+
+  return workflow;
+}
@@ -0,0 +1,23 @@
+import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
+import { LlamaIndexServer } from "@llamaindex/server";
+import { Settings } from "llamaindex";
+import { workflowFactory } from "./agent-workflow";
+// Uncomment this to use a custom workflow
+// import { workflowFactory } from "./custom-workflow";
+
+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
+Settings.embedModel = new OpenAIEmbedding({
+  model: "text-embedding-3-small",
+});
+
+new LlamaIndexServer({
+  workflow: workflowFactory,
+  suggestNextQuestions: false,
+  uiConfig: {
+    enableFileUpload: true,
+  },
+  port: 3000,
+}).start();
@@ -0,0 +1,28 @@
+import { OpenAI } from "@llamaindex/openai";
+import { LlamaIndexServer } from "@llamaindex/server";
+import { agent } from "@llamaindex/workflow";
+import { Settings, tool } from "llamaindex";
+import { z } from "zod";
+
+Settings.llm = new OpenAI({
+  model: "gpt-4o-mini",
+});
+
+const calculatorAgent = agent({
+  tools: [
+    tool({
+      name: "add",
+      description: "Adds two numbers",
+      parameters: z.object({ x: z.number(), y: z.number() }),
+      execute: ({ x, y }) => x + y,
+    }),
+  ],
+});
+
+new LlamaIndexServer({
+  workflow: () => calculatorAgent,
+  uiConfig: {
+    starterQuestions: ["1 + 1", "2 + 2"],
+  },
+  port: 3000,
+}).start();
@@ -0,0 +1,14 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "moduleResolution": "bundler",
+    "esModuleInterop": true,
+    "forceConsistentCasingInFileNames": true,
+    "strict": true,
+    "skipLibCheck": true,
+    "outDir": "dist"
+  },
+  "include": ["**/*"],
+  "exclude": ["node_modules", "dist", "custom-layout/layout"]
+}
@@ -0,0 +1,45 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Next.js](https://nextjs.org/) that is ejected from [`llamaindex-server`](https://github.com/run-llama/create-llama/tree/main/packages/server) via `npm eject` command.
+
+## Quick Start
+
+As this is a Next.js project, you can use the following commands to start the development server:
+
+```bash
+npm install
+npm run dev
+```
+
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
+
+## Useful Commands
+
+- Generate Datasource (in case you're having a `./data` folder): `npm run generate`
+- Typecheck: `npm run typecheck`
+- Lint: `npm run lint`
+- Format: `npm run format`
+- Build & Start: `npm run build && npm run start`
+
+## Deployment
+
+The project can be deployed to any platform that supports Next.js like Vercel.
+
+## Configuration
+
+Your original [`llamaindex-server`](https://github.com/run-llama/create-llama/tree/main/packages/server#configuration-options) configurations have been migrated to a [`.env`](.env) file.
+
+Changing the `.env` file will change the behavior of the application, e.g. for changing the initial questions to display in the chat, you can do:
+
+```
+NEXT_PUBLIC_STARTER_QUESTIONS=['What is the capital of France?']
+```
+
+Alternatively, you can also change the file referencing `process.env.NEXT_PUBLIC_STARTER_QUESTIONS` directly in the source code.
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
+- [LlamaIndexTS Documentation](https://ts.llamaindex.ai) - learn about LlamaIndex (Typescript features).
+
+You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!
@@ -0,0 +1,32 @@
+import { getEnv } from "@llamaindex/env";
+import { LLamaCloudFileService } from "llamaindex";
+import { NextRequest, NextResponse } from "next/server";
+
+export async function GET(request: NextRequest): Promise<NextResponse> {
+  if (!getEnv("LLAMA_CLOUD_API_KEY")) {
+    return NextResponse.json(
+      {
+        error: "env variable LLAMA_CLOUD_API_KEY is required to use LlamaCloud",
+      },
+      { status: 500 },
+    );
+  }
+
+  try {
+    const config = {
+      projects: await LLamaCloudFileService.getAllProjectsWithPipelines(),
+      pipeline: {
+        pipeline: getEnv("LLAMA_CLOUD_INDEX_NAME"),
+        project: getEnv("LLAMA_CLOUD_PROJECT_NAME"),
+      },
+    };
+    return NextResponse.json(config, { status: 200 });
+  } catch (error) {
+    return NextResponse.json(
+      {
+        error: "Failed to fetch LlamaCloud configuration",
+      },
+      { status: 500 },
+    );
+  }
+}
@@ -0,0 +1,86 @@
+import { type AgentInputData } from "@llamaindex/workflow";
+import { type Message } from "ai";
+import { type MessageType } from "llamaindex";
+import { NextRequest, NextResponse } from "next/server";
+
+// import chat utils
+import {
+  runWorkflow,
+  sendSuggestedQuestionsEvent,
+  toDataStream,
+} from "./utils";
+
+// import workflow factory and settings from local file
+import { initSettings } from "./app/settings";
+import { workflowFactory } from "./app/workflow";
+
+initSettings();
+
+export async function POST(req: NextRequest) {
+  try {
+    const reqBody = await req.json();
+    const suggestNextQuestions = process.env.SUGGEST_NEXT_QUESTIONS === "true";
+
+    const { messages } = reqBody as { messages: Message[] };
+    const chatHistory = messages.map((message) => ({
+      role: message.role as MessageType,
+      content: message.content,
+    }));
+
+    const lastMessage = messages[messages.length - 1];
+    if (lastMessage?.role !== "user") {
+      return NextResponse.json(
+        {
+          detail: "Messages cannot be empty and last message must be from user",
+        },
+        { status: 400 },
+      );
+    }
+    const workflowInput: AgentInputData = {
+      userInput: lastMessage.content,
+      chatHistory,
+    };
+
+    const abortController = new AbortController();
+    req.signal.addEventListener("abort", () =>
+      abortController.abort("Connection closed"),
+    );
+
+    const workflow = await workflowFactory(reqBody);
+    const workflowEventStream = await runWorkflow(
+      workflow,
+      workflowInput,
+      abortController.signal,
+    );
+
+    const dataStream = toDataStream(workflowEventStream, {
+      callbacks: {
+        onFinal: async (completion, dataStreamWriter) => {
+          chatHistory.push({
+            role: "assistant" as MessageType,
+            content: completion,
+          });
+          if (suggestNextQuestions) {
+            await sendSuggestedQuestionsEvent(dataStreamWriter, chatHistory);
+          }
+        },
+      },
+    });
+
+    return new Response(dataStream, {
+      status: 200,
+      headers: {
+        "Content-Type": "text/plain; charset=utf-8",
+        "X-Vercel-AI-Data-Stream": "v1",
+      },
+    });
+  } catch (error) {
+    console.error("Chat handler error:", error);
+    return NextResponse.json(
+      {
+        detail: (error as Error).message || "Internal server error",
+      },
+      { status: 500 },
+    );
+  }
+}
@@ -0,0 +1,9 @@
+import { NextRequest } from "next/server";
+import { handleComponentRoute } from "../shared/component-handler";
+
+export async function GET(request: NextRequest) {
+  const params = request.nextUrl.searchParams;
+  const directory =
+    params.get("componentsDir") || process.env.COMPONENTS_DIR || "components";
+  return handleComponentRoute(directory);
+}
@@ -0,0 +1,97 @@
+import { exec } from "child_process";
+import fs from "fs";
+import { NextRequest, NextResponse } from "next/server";
+import path from "path";
+import { promisify } from "util";
+
+const DEFAULT_WORKFLOW_FILE_PATH =
+  process.env.WORKFLOW_FILE_PATH || "src/app/workflow.ts";
+
+export async function GET(request: NextRequest) {
+  const filePath = DEFAULT_WORKFLOW_FILE_PATH;
+
+  const fileExists = await promisify(fs.exists)(DEFAULT_WORKFLOW_FILE_PATH);
+  if (!fileExists) {
+    return NextResponse.json(
+      {
+        detail: `Dev mode is currently in beta. It only supports updating workflow file at ${filePath}`,
+      },
+      { status: 404 },
+    );
+  }
+
+  const content = await promisify(fs.readFile)(filePath, "utf-8");
+  const last_modified = fs.statSync(filePath).mtime.getTime();
+
+  return NextResponse.json(
+    { content, file_path: filePath, last_modified },
+    { status: 200 },
+  );
+}
+
+export async function PUT(request: NextRequest) {
+  const filePath = DEFAULT_WORKFLOW_FILE_PATH;
+  const { content } = await request.json();
+
+  const fileExists = await promisify(fs.exists)(filePath);
+  if (!fileExists) {
+    return NextResponse.json(
+      {
+        detail: `Dev mode is currently in beta. It only supports updating workflow file at ${DEFAULT_WORKFLOW_FILE_PATH}`,
+      },
+      { status: 404 },
+    );
+  }
+
+  try {
+    const resolvedFilePath = path.resolve(DEFAULT_WORKFLOW_FILE_PATH);
+    const result = await validateTypeScriptFile(resolvedFilePath, content);
+
+    if (!result.isValid) {
+      return NextResponse.json(
+        {
+          detail: result.errors.join("\n"),
+        },
+        { status: 400 },
+      );
+    }
+
+    await promisify(fs.writeFile)(filePath, content);
+    return NextResponse.json({ content }, { status: 200 });
+  } catch (error) {
+    console.error("Error updating workflow file:", error);
+    return NextResponse.json(
+      { error: "Failed to update workflow file" },
+      { status: 500 },
+    );
+  }
+}
+
+// use typescript package to validate the file syntax and imports
+async function validateTypeScriptFile(filePath: string, content: string) {
+  // Update workflow file directly will cause the server restart immediately.
+  // So we create a temporary file with the same content in the same directory as the workflow file
+  // This file will be used to validate the file syntax and imports. It will be deleted after validation.
+  const tempFilePath = path.join(
+    path.dirname(filePath),
+    `workflow_${Date.now()}.ts`,
+  );
+  fs.writeFileSync(tempFilePath, content);
+
+  const errors = [];
+  try {
+    const tscCommand = `npx tsc ${tempFilePath} --noEmit --skipLibCheck true`;
+    await promisify(exec)(tscCommand);
+  } catch (error) {
+    const errorMessage = (error as { stdout: string })?.stdout;
+    errors.push(errorMessage);
+  } finally {
+    // Clean up temporary file
+    if (fs.existsSync(tempFilePath)) fs.unlinkSync(tempFilePath);
+  }
+
+  return {
+    isValid: errors.length === 0,
+    errors: errors,
+  };
+}
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
thucpn	5d4a374c77	try fixed version	2025-06-12 13:59:20 +07:00
thucpn	e13d5442f9	try 1 use case	2025-06-12 13:53:17 +07:00
thucpn	33205abad8	test with 1 option	2025-06-12 13:46:11 +07:00
thucpn	ae79064f33	test: e2e	2025-06-12 12:27:58 +07:00
Huu Le	66b81e5323	fix cannot catch the error raised from the workflow (#684 )	2025-06-09 16:53:49 +07:00
github-actions[bot]	924649c025	Release 0.1.21 (#680 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-06-06 17:19:25 +07:00
Thuc Pham	1b04db917b	fix lint for release (#682 )	2025-06-06 16:43:45 +07:00
Thuc Pham	af9ad3c42d	feat: show document artifact after generating report (#658 ) * feat: show document artifact after generating report * keep chat message content as it is * use artifactEvent from server * add deep research example * bump chat-ui for new editor * import editor css * hide warning for workflowEvent<{}>() in eject mode * fix format * use CL for better testing * generate artifact after streaming report in Python * bump chat-ui to support citations * use isinstance to check stream * fix document editor spacing * Create tame-wolves-obey.md * add sources to document artifact * add sources to document artifact in python * type cast * no need score * fix lint * move handle stream logic to server * refactor: use chunk.text and chunk.raw * bump chat-ui 0.5.6 to fix citations * update changset * fix lock	2025-06-06 16:34:52 +07:00
Huu Le	1ff6eaf3e1	feat: Support upload private file (#674 ) * init private support for python BE * feat: Add private file handling and upload support in FastAPI - Introduced `main.py` to set up the FastAPI application with file upload capabilities. - Created `workflow.py` to manage file reading and tool creation for uploaded files. - Updated `server.py` to include upload API configuration. - Modified chat router to handle file uploads and return server file metadata. - Refactored chat models to support new file handling structure. - Enhanced file service to manage private file storage and retrieval. * add process base64 and update examples * add readme example * fix test * feat: Add file upload support to LlamaIndexServer TS * add get_file to fileservice * refactor: Simplify file storage logic in helpers.ts * update example * attach file to user message * fix example, improve model * feat: Add file upload support and enhance chat workflow in LlamaIndexServer * remove redundant change * support agent workflow for ts * Enhance README and add file upload examples for LlamaIndex Server. Updated instructions for running examples and added new workflows for handling uploaded files. Included detailed notes on using file attachments in workflows. * update doc * update example * Enhance README with detailed instructions for file upload in chat UI. Update custom workflow to handle file attachments and modify chat router to remove unused attachment handling. Refactor create_workflow to pass attachments from chat request. * Refactor file handling in workflows by updating the create_file_tool function to accept file attachments directly. Introduce a new ServerFileResponse model for better file response handling. Update chat router to utilize the new FileUpload model for file uploads. Clean up imports and ensure consistent file attachment processing across workflows. * Enhance file handling in workflows by updating README and example files. Introduce a new `workflowFactory` structure to support file attachments, and improve the `extractFileAttachments` function for better clarity and usability. Update descriptions in tools to reflect changes in file ID handling. * fix unstoppable * chore: fix issues * add changeset * bump chat-ui * bump chat-ui for eject project --------- Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-06-06 15:58:56 +07:00
Thuc Pham	a543a27faf	feat: bump chat-ui with inline artifact (#675 ) * feat: bump chat-ui with inline artifact * bump chat-ui 0.5.0 * update extractLastArtifact * fix: imports * fix: circle import * missing export * update document gen workflow * remove artifactEvent for annotations * update document * bump chat-ui 0.5.1 to fix parsing $ * bump chat-ui 0.5.2 * toArtifactEvent internal * update doc to use toArtifactEvent * do workflow transformmation internal * revert doc * keep contract * fix format * update get_last_artifact to extract inline annotations in Python * fix imports * Transforms ArtifactEvent to AgentStream with inline annotation format * Create thick-turtles-deny.md * donot use relative imports * toInlineAnnotationEvent * to_inline_annotation_event in python * refactor: move toInlineAnnotationEvent to inline.ts * update comment * rename ArtifactTransform to InlineAnnotationTransformer * add codegen example --------- Co-authored-by: leehuwuj <leehuwuj@gmail.com>	2025-06-05 10:20:21 +07:00
Thuc Pham	63edd74ba1	fix: conflict package versions in ts examples (#678 )	2025-06-05 09:25:54 +07:00
Marcus Schiesser	13a967b2a2	docs: improved python readmes	2025-06-03 14:57:57 +07:00
Huu Le	2ac4d92493	chore: update examples (#677 )	2025-06-03 14:33:27 +07:00
Marcus Schiesser	7e47cba4ba	docs: clarify HITL example	2025-06-03 08:52:45 +07:00
github-actions[bot]	bc56fa3c5f	Release 0.5.20 (#671 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-06-02 18:02:05 +07:00
Huu Le	087c96164d	feat: [server] Add Human in the Loop example with FastAPI integration (#630 )	2025-06-02 17:47:04 +07:00
Thuc Pham	3ff0a18876	fix: default header padding (#672 )	2025-05-31 14:08:29 +07:00
Thuc Pham	df1047480a	fix: missing cursor pointer for button (#670 )	2025-05-30 09:52:17 +07:00
Marcus Schiesser	8d89223a08	chore: fill empty chat message default	2025-05-29 21:05:53 +07:00
github-actions[bot]	49a944182f	Release 0.2.5 (#669 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-29 13:06:58 +07:00
Marcus Schiesser	058b3762c1	fix: update generate script path for ejected project (#668 )	2025-05-29 12:21:17 +07:00
Thuc Pham	4c8579b04f	use eject file in linux (#663 )	2025-05-29 09:15:52 +07:00
github-actions[bot]	bb1e82cdae	Release 0.1.18 (#660 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-28 17:57:45 +07:00
Huu Le	f682a1c36e	chore: add project directory to Prettier ignore list (#659 )	2025-05-28 17:50:23 +07:00
Huu Le	b8a1ff6412	feat: Support citation for agentic template (#642 )	2025-05-28 17:28:50 +07:00
Thuc Pham	5fe9e17d3f	feat: support eject to fully customize next folder (#653 )	2025-05-28 17:09:47 +07:00
Marcus Schiesser	15619d81a6	added claude code files	2025-05-27 13:39:57 +07:00
Huu Le	76742da78a	chore: add python release condition (#656 )	2025-05-27 09:25:36 +07:00
github-actions[bot]	693d7a0ea5	Release 0.5.18 (#655 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-26 18:43:41 +07:00
Huu Le	8d59ef0a6b	chore: Add layout_dir config to the generated python code (#654 )	2025-05-26 18:09:31 +07:00
github-actions[bot]	c62f26e31c	Release 0.1.17 (#652 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-26 11:21:23 +07:00
Huu Le	d3f73679b4	chore: add server package path to ESLint ignore list (#651 )	2025-05-26 10:58:40 +07:00
Huu Le	91c35cff33	fix release action didn't run custom version command (#650 )	2025-05-26 10:43:11 +07:00
github-actions[bot]	82ac925224	Release 0.1.17 (#644 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-23 17:10:05 +07:00
thucpn	f24ee8e6f9	fix: missing comma in config	2025-05-23 16:39:27 +07:00
Thuc Pham	3acec88fbc	chore: bump chat-ui (#645 )	2025-05-23 15:18:17 +07:00
Thuc Pham	eee3230e99	feat: support custom layout (#641 )	2025-05-23 14:18:22 +07:00
Marcus Schiesser	d8425e5290	docs: fix type	2025-05-23 13:22:11 +07:00
Huu Le	0bc5a0d882	feat: Add config for suggest next question (#640 ) * Enhance LlamaIndexServer with next question suggestion feature - Added `suggest_next_questions` parameter to the LlamaIndexServer for suggesting follow-up questions after the assistant's response. - Updated README.md to document the new configuration option. - Introduced `SUGGEST_NEXT_QUESTION_PROMPT` in prompts.py for customizable question suggestions. - Bumped version to 0.1.16 in uv.lock to reflect the new feature. * Implement next question suggestion feature in LlamaIndexServer - Added `suggestNextQuestions` option to LlamaIndexServer for suggesting follow-up questions after the assistant's response. - Updated README.md to include the new configuration option. - Modified example workflow to utilize the new feature. - Enhanced chat handler to conditionally send suggested questions based on the new option. * add changeset * remove log * bundle ui instead of download * check test * check test check test check test check test check test check test check test check test check test check test * fix tests * Update artifact path in workflow and clarify README.md text - Changed the artifact path in the GitHub Actions workflow from `python/llama-index-server/dist/` to `dist/`. - Revised README.md to clarify the default prompt used for the `suggest_next_questions` configuration option. * support changeset for python * refactor: update llama-index-server structure and workflows * fix workflows * fix workflows * fix workflows * add changeset * fix cannot release python * Update packages/server/README.md Co-authored-by: Thuc Pham <51660321+thucpn@users.noreply.github.com> * Update starter questions in LlamaIndex App and add TODO for suggestion feature in chat API --------- Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de> Co-authored-by: Thuc Pham <51660321+thucpn@users.noreply.github.com>	2025-05-23 12:48:45 +07:00
github-actions[bot]	bbae802bed	Release 0.2.2 (#638 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-22 17:17:34 +07:00
Thuc Pham	25fba4381b	refactor: migrate to Nextjs Route Handler (#625 )	2025-05-22 11:47:24 +07:00
Huu Le	d0618fa2fa	add changeset (#639 )	2025-05-21 14:31:41 +07:00
Huu Le	f3fe3ffc9b	fix: llamacloud generate not working and re-add tests (#636 )	2025-05-21 12:49:44 +07:00
Thuc Pham	6f75d4ab6e	fix: unsupported language in code gen workflow (#633 )	2025-05-21 12:31:11 +07:00
Huu Le	3242738fe4	chore: Fix Python e2e tests (#632 )	2025-05-21 11:30:02 +07:00
Sourabh Kondapaka	17538eb0dd	Fixed bug when traceloop observability is chosen but does not install the latest version (#603 ) Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-05-20 11:48:32 +07:00
github-actions[bot]	d3772cb4a2	Release 0.5.15 (#629 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-16 16:33:35 +07:00
Huu Le	527075c086	enable dev mode that allows updating code directly in the UI (#624 ) * Enable dev mode that allows updating code directly in the UI * bump server packages	2025-05-16 16:05:56 +07:00
github-actions[bot]	fb7d4da149	chore(release): bump llama-index-server version to 0.1.16 (#587 ) Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>	2025-05-16 15:16:57 +07:00
leehuwuj	5c35b194bb	bump chat ui version	2025-05-16 14:53:57 +07:00
github-actions[bot]	85e5e7e662	Release 0.5.14 (#608 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-16 14:41:46 +07:00
Huu Le	58362542c0	chore: add workflow contract for server (#623 )	2025-05-16 14:26:24 +07:00
Thuc Pham	6f44185f68	fix: init messages memory in start event handler (#627 )	2025-05-16 12:45:35 +07:00
Thuc Pham	afe9e9fc16	fix: nodemon should ignore temp file (#622 )	2025-05-15 15:33:24 +07:00
Thuc Pham	1b5a519f13	chore: improve dev experience with nodemon (#621 )	2025-05-15 15:18:12 +07:00
Huu Le	f072308d03	feat: Add dev mode (#610 ) * Add UI components and static assets for chat interface * feat: Add simple chat app example with FastAPI integration * fix: update default workflow file path and improve error handling * update doc * change to file_path * include changes from #614 * fix mypy * support devmode for backend ts server * Revert "support devmode for backend ts server" This reverts commit `bd943fd8c1`. * fix: polling should work when server not yet started * bump chat-ui to fix syntax highlight issue * fix: missing language for code editor * enhance UI with shadow overlay * enhance doc * fix minor UI bugs * enhance doc * remove unessesary debug log * fix wrong check * increase delay time before trigger polling * feat: support dev mode for backend ts server (#616) * feat: support dev mode for backend ts server * update message * validate typescript file * fix: format * use temp file to avoid server restart * fix format * use npx tsc * remove typescript deps --------- Co-authored-by: thucpn <thucsh2@gmail.com> Co-authored-by: Thuc Pham <51660321+thucpn@users.noreply.github.com>	2025-05-15 14:56:08 +07:00
Huu Le	1df8cfbdc2	refactor: split artifacts use case into document generator and code generator (#617 ) * split artifacts use case to code generator and document generator * add changeset * fix package version * fix typing * bump openai * fix package * fix typing * fix: improve type handling and clean up UI event component - Removed unnecessary string conversion for userInput in code_generator and deep_research workflows. - Updated userRequest type to MessageContent for better type safety. - Cleaned up the UI event component by removing redundant indicatorClassName logic. * docs: word smith * better handler typing * refactor: remove redundant UI event handling in workflows --------- Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-05-15 14:22:21 +07:00
Thuc Pham	24515393a6	fix: remove dead generated ai code (#618 )	2025-05-14 12:42:43 +07:00
Huu Le	b3eb0ba7d4	fix typing issue and add typing test for llamaindexserver templates (#613 ) * try testing for llamaindexserver * Enhance TypeScript tests for dependency resolution by introducing template types and use cases * refactor template structure * fix package conflict * add tests for python * fix python mypy * use matrix for templateType * add changeset * add removing data.ts for artifacts template * don't ask llamacloud for unsupported use case and skip test * Enhance tests for LlamaIndexServer by adding conditional skips based on data source and refining use case tests for example data source	2025-05-13 16:20:24 +07:00
Huu Le	556f33c0ab	pin onnxruntime version to fix issue on Windows (#609 )	2025-05-12 16:25:47 +07:00
Marcus Schiesser	7a70390b00	chore: deprecate pro mode (#607 )	2025-05-12 12:07:55 +07:00
github-actions[bot]	ad5912b41f	Release 0.5.13 (#605 ) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>	2025-05-09 17:31:53 +07:00
Marcus Schiesser	76502d28e7	chore: remove changeset	2025-05-09 17:29:50 +07:00
Huu Le	f4ca602da5	feat: Add artifact use case and use new the workflow for Typescript (#595 ) --------- Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>	2025-05-09 17:20:30 +07:00
Thuc Pham	d304554f33	feat: add examples package for easily testing workflow (#599 )	2025-05-08 17:15:00 +07:00