Release 0.1.21 (#680 )

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
fix lint for release (#682 )
2026-07-02 19:14:28 -04:00 · 2025-06-06 17:19:25 +07:00 · 2025-06-06 16:43:45 +07:00 · 2025-06-06 16:34:52 +07:00 · 2025-06-06 15:58:56 +07:00 · 2025-06-05 10:20:21 +07:00
302 changed files with 24363 additions and 9541 deletions
@@ -4,10 +4,12 @@ on:
    branches: [main]
    paths-ignore:
      - "python/llama-index-server/**"
+      - ".github/workflows/*llama_index_server.yml"
  pull_request:
    branches: [main]
    paths-ignore:
      - "python/llama-index-server/**"
+      - ".github/workflows/*llama_index_server.yml"

 jobs:
  e2e-python:
@@ -21,6 +23,7 @@ jobs:
        os: [macos-latest, windows-latest, ubuntu-22.04]
        frameworks: ["fastapi"]
        datasources: ["--no-files", "--example-file", "--llamacloud"]
+        template-types: ["streaming", "llamaindexserver"]
    defaults:
      run:
        shell: bash
@@ -61,6 +64,15 @@ jobs:
        run: pnpm run pack-install
        working-directory: packages/create-llama

+      - name: Build and store server package
+        run: |
+          pnpm run build
+          wheel_file=$(ls dist/*.whl | head -n 1)
+          mkdir -p "${{ runner.temp }}"
+          cp "$wheel_file" "${{ runner.temp }}/"
+          echo "SERVER_PACKAGE_PATH=${{ runner.temp }}/$(basename "$wheel_file")" >> $GITHUB_ENV
+        working-directory: python/llama-index-server
+
      - name: Run Playwright tests for Python
        run: pnpm run e2e:python
        env:
@@ -68,14 +80,16 @@ jobs:
          LLAMA_CLOUD_API_KEY: ${{ secrets.LLAMA_CLOUD_API_KEY }}
          FRAMEWORK: ${{ matrix.frameworks }}
          DATASOURCE: ${{ matrix.datasources }}
+          TEMPLATE_TYPE: ${{ matrix.template-types }}
          PYTHONIOENCODING: utf-8
          PYTHONLEGACYWINDOWSSTDIO: utf-8
+          SERVER_PACKAGE_PATH: ${{ env.SERVER_PACKAGE_PATH }}
        working-directory: packages/create-llama

      - uses: actions/upload-artifact@v4
        if: always()
        with:
-          name: playwright-report-python-${{ matrix.os }}-${{ matrix.frameworks }}-${{ matrix.datasources }}
+          name: playwright-report-python-${{ matrix.os }}-${{ matrix.frameworks }}-${{ matrix.datasources }}-${{ matrix.template-types }}
          path: packages/create-llama/playwright-report/
          overwrite: true
          retention-days: 30
@@ -91,6 +105,7 @@ jobs:
        os: [macos-latest, windows-latest, ubuntu-22.04]
        frameworks: ["nextjs"]
        datasources: ["--no-files", "--example-file", "--llamacloud"]
+        template-types: ["streaming", "llamaindexserver"]
    defaults:
      run:
        shell: bash
@@ -131,6 +146,21 @@ jobs:
        run: pnpm run pack-install
        working-directory: packages/create-llama

+      - name: Build server
+        run: pnpm run build
+        working-directory: packages/server
+
+      - name: Pack @llamaindex/server package
+        run: |
+          pnpm pack --pack-destination "${{ runner.temp }}"
+          if [ "${{ runner.os }}" == "Windows" ]; then
+            file=$(find "${{ runner.temp }}" -name "llamaindex-server-*.tgz" | head -n 1)
+            mv "$file" "${{ runner.temp }}/llamaindex-server.tgz"
+          else
+            mv ${{ runner.temp }}/llamaindex-server-*.tgz ${{ runner.temp }}/llamaindex-server.tgz
+          fi
+        working-directory: packages/server
+
      - name: Run Playwright tests for TypeScript
        run: pnpm run e2e:typescript
        env:
@@ -138,12 +168,14 @@ jobs:
          LLAMA_CLOUD_API_KEY: ${{ secrets.LLAMA_CLOUD_API_KEY }}
          FRAMEWORK: ${{ matrix.frameworks }}
          DATASOURCE: ${{ matrix.datasources }}
+          TEMPLATE_TYPE: ${{ matrix.template-types }}
+          SERVER_PACKAGE_PATH: ${{ runner.temp }}/llamaindex-server.tgz
        working-directory: packages/create-llama

      - uses: actions/upload-artifact@v4
        if: always()
        with:
-          name: playwright-report-typescript-${{ matrix.os }}-${{ matrix.frameworks }}-${{ matrix.datasources }}-node${{ matrix.node-version }}
+          name: playwright-report-typescript-${{ matrix.os }}-${{ matrix.frameworks }}-${{ matrix.datasources }}-node${{ matrix.node-version }}-${{ matrix.template-types }}
          path: packages/create-llama/playwright-report/
          overwrite: true
          retention-days: 30
@@ -16,6 +16,16 @@ jobs:

      - uses: pnpm/action-setup@v3

+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
      - name: Setup Node.js
        uses: actions/setup-node@v4
        with:
@@ -31,6 +41,13 @@ jobs:
      - name: Run Prettier
        run: pnpm run format

+      - name: Run build
+        run: pnpm run build
+
+      - name: Run Typecheck for examples
+        run: pnpm run typecheck
+        working-directory: packages/server/examples
+
      - name: Run Python format check
        uses: chartboost/ruff-action@v1
        with:
@@ -17,6 +17,11 @@ jobs:

      - uses: pnpm/action-setup@v3

+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
      - name: Install uv
        uses: astral-sh/setup-uv@v3

@@ -51,8 +56,12 @@ jobs:
        with:
          commit: Release ${{ steps.get-changeset-status.outputs.new-version }}
          title: Release ${{ steps.get-changeset-status.outputs.new-version }}
+          # bump versions
+          version: pnpm new-version
          # build package and call changeset publish
          publish: pnpm release
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
+          PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
+          UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
@@ -1,130 +0,0 @@
-name: Release llama-index-server
-
-on:
-  push:
-    branches:
-      - main
-    paths:
-      - "python/llama-index-server/**"
-      - ".github/workflows/release_llama_index_server.yml"
-  pull_request:
-    types:
-      - closed
-
-concurrency: ${{ github.workflow }}-${{ github.ref }}
-
-jobs:
-  release:
-    name: Create Release PR
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: ./python/llama-index-server
-    if: |
-      github.event_name == 'push' && 
-      !startsWith(github.ref, 'refs/heads/release/llama-index-server-v')
-
-    steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install Poetry
-        run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-
-      - name: Install dependencies
-        run: poetry install
-
-      - name: Setup Git
-        run: |
-          git config --global user.email "github-actions[bot]@users.noreply.github.com"
-          git config --global user.name "github-actions[bot]"
-
-      - name: Bump patch version
-        run: |
-          poetry version patch
-          git add pyproject.toml
-          git commit -m "chore(release): bump version to $(poetry version -s)"
-
-      - name: Get current version
-        id: get_version
-        run: |
-          version=$(poetry version -s)
-          echo "current_version=${version}" >> "$GITHUB_OUTPUT"
-
-      - name: Create Release PR
-        uses: peter-evans/create-pull-request@v6
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
-          commit-message: "Release: llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          title: "Release: llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          body: |
-            This PR was automatically created to release a new version of the llama-index-server package.
-
-            Version: ${{ steps.get_version.outputs.current_version }}
-
-            Please review the changes and merge to trigger the release.
-          branch: release/llama-index-server-v${{ steps.get_version.outputs.current_version }}
-          base: main
-          labels: release, llama-index-server
-
-  publish:
-    name: Publish to PyPI
-    runs-on: ubuntu-latest
-    defaults:
-      run:
-        working-directory: ./python/llama-index-server
-    if: |
-      github.event_name == 'pull_request' && 
-      github.event.pull_request.merged == true && 
-      startsWith(github.event.pull_request.title, 'Release: llama-index-server') &&
-      startsWith(github.event.pull_request.head.ref, 'release/llama-index-server-v')
-
-    steps:
-      - name: Checkout Repository
-        uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.11"
-
-      - name: Install Poetry
-        run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-
-      - name: Install dependencies
-        run: poetry install
-
-      - name: Get current version
-        id: get_version
-        run: |
-          version=$(poetry version -s)
-          echo "current_version=${version}" >> "$GITHUB_OUTPUT"
-
-      - name: Build and publish to PyPI
-        uses: JRubics/poetry-publish@v2.1
-        with:
-          python_version: "3.11"
-          pypi_token: ${{ secrets.PYPI_TOKEN }}
-          package_directory: "python/llama-index-server"
-          poetry_install_options: "--without dev"
-
-      - name: Create GitHub Release
-        uses: softprops/action-gh-release@v2
-        with:
-          tag_name: llama-index-server-v${{ steps.get_version.outputs.current_version }}
-          name: "llama-index-server v${{ steps.get_version.outputs.current_version }}"
-          body: |
-            Release of llama-index-server v${{ steps.get_version.outputs.current_version }}
-          draft: false
-          prerelease: false
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -4,8 +4,8 @@ on:
  pull_request:

 env:
-  POETRY_VERSION: "1.8.3"
  PYTHON_VERSION: "3.9"
+  UI_TEST: "true"

 jobs:
  unit-test:
@@ -20,30 +20,31 @@ jobs:
        python-version: ["3.9"]
    steps:
      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v3

-      - name: Install Poetry
-        run: pipx install poetry==${{ env.POETRY_VERSION }}
-
-      - name: Set up python ${{ matrix.python-version }}
+      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
-          cache: "poetry"

-      - name: Configure Poetry
-        run: |
-          poetry config virtualenvs.create true
-          poetry config virtualenvs.in-project true
-          poetry env use python
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: ".nvmrc"
+          cache: "pnpm"

      - name: Install dependencies
        shell: bash
-        run: poetry install --with dev
+        run: pnpm install && pnpm build

      - name: Run unit tests
        shell: bash
-        run: |
-          poetry run pytest tests
+        run: uv run pytest tests

  type-check:
    name: Type Check
@@ -53,29 +54,24 @@ jobs:
        working-directory: python/llama-index-server
    steps:
      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v3

-      - name: Install Poetry
-        run: pipx install poetry==${{ env.POETRY_VERSION }}
-
-      - name: Set up Python
+      - name: Setup Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}
-          cache: "poetry"

-      - name: Configure Poetry
-        run: |
-          poetry config virtualenvs.create true
-          poetry config virtualenvs.in-project true
-          poetry env use python
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true

      - name: Install dependencies
-        shell: bash
-        run: poetry install --with dev
+        run: pnpm install

      - name: Run mypy
        shell: bash
-        run: poetry run mypy llama_index
+        run: uv run mypy llama_index

  build:
    needs: [unit-test, type-check]
@@ -85,27 +81,56 @@ jobs:
        working-directory: python/llama-index-server
    steps:
      - uses: actions/checkout@v4
-      - name: Install Poetry
-        run: pipx install poetry==${{ env.POETRY_VERSION }}
+      - uses: pnpm/action-setup@v3
+
      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}
-      - name: Clear python cache
-        shell: bash
-        run: poetry cache clear --all pypi
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: ".nvmrc"
+          cache: "pnpm"
+
+      - name: Install dependencies
+        run: pnpm install && pnpm build
+
      - name: Build package
        shell: bash
-        run: poetry build
-      - name: Test installing built package
+        run: uv build
+
+      - name: Get the absolute wheel file path and save it to the output
        shell: bash
-        run: python -m pip install .
+        id: get_whl_path
+        run: |
+          WHL_FILE=$(readlink -f dist/*.whl)
+          echo "whl_file=$WHL_FILE" >> $GITHUB_OUTPUT
+
      - name: Test import
        shell: bash
-        working-directory: ${{ vars.RUNNER_TEMP }}
-        run: python -c "from llama_index.server import LlamaIndexServer"
+        working-directory: ${{ github.workspace }}
+        env:
+          WHL_FILE: ${{ steps.get_whl_path.outputs.whl_file }}
+        run: |
+          uv run --with $WHL_FILE python -c "from llama_index.server import LlamaIndexServer"
+
+      - name: Check frontend resources is present
+        shell: bash
+        working-directory: ${{ github.workspace }}
+        env:
+          WHL_FILE: ${{ steps.get_whl_path.outputs.whl_file }}
+        run: |
+          uv run --with $WHL_FILE python -c "from llama_index.server.chat_ui import check_ui_resources; check_ui_resources()"
+
      - name: Upload artifact
        uses: actions/upload-artifact@v4
        with:
          name: llama-index-server
-          path: python/llama-index-server/dist/
+          path: dist/
@@ -1,3 +1,4 @@
 pnpm format
 pnpm lint
-uvx ruff format --check packages/create-llama/templates/
+uvx ruff check .
+uvx ruff format . --check 
@@ -0,0 +1,18 @@
+node_modules/
+pnpm-lock.yaml
+lib/
+dist/
+cache/
+build/
+.next/
+out/
+packages/server/server/
+packages/server/project/
+**/playwright-report/
+**/test-results/
+
+# Python
+python/
+**/*.mypy_cache/**
+**/*.venv/**
+**/*.ruff_cache/**
@@ -0,0 +1,201 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Repository Overview
+
+Create-llama is a monorepo containing CLI tools and server frameworks for building LlamaIndex-powered applications. The repository combines TypeScript/Node.js and Python components in a unified development environment.
+
+## Architecture
+
+### Monorepo Structure
+
+- **`packages/create-llama/`**: Main CLI tool for scaffolding LlamaIndex applications
+- **`packages/server/`**: TypeScript/Next.js server framework (`@llamaindex/server`)
+- **`python/llama-index-server/`**: Python/FastAPI server framework
+- **Root**: Workspace configuration and shared development tools
+
+### Key Technologies
+
+- **Package Manager**: pnpm with workspace configuration
+- **Build Tools**: bunchee (TypeScript), Next.js, hatchling (Python)
+- **Testing**: Playwright for e2e, pytest for Python
+- **Version Management**: changesets for TypeScript packages, manual for Python
+
+## Development Commands
+
+### Root Level (Monorepo)
+
+```bash
+pnpm dev          # Start all packages in development mode
+pnpm build        # Build all packages
+pnpm lint         # ESLint across TypeScript packages
+pnpm format       # Prettier formatting
+pnpm e2e          # Run end-to-end tests
+```
+
+### Create-llama Package
+
+```bash
+cd packages/create-llama
+npm run build     # Build CLI using bash script and ncc
+npm run dev       # Watch mode development
+npm run e2e       # Playwright tests for generated projects
+npm run clean     # Clean build artifacts and template caches
+```
+
+### TypeScript Server Package
+
+```bash
+cd packages/server
+pnpm dev          # Watch mode with bunchee
+pnpm build        # Multi-step build: ESM/CJS + Next.js + static assets
+pnpm clean        # Clean all build outputs
+```
+
+### Python Server Package
+
+```bash
+cd python/llama-index-server
+uv run generate   # Index data files
+fastapi dev       # Start development server with hot reload
+pytest            # Run test suite
+```
+
+## Template System
+
+The CLI uses a sophisticated template system in `packages/create-llama/templates/`:
+
+### Organization
+
+- **`types/`**: Base project structures (streaming, reflex, llamaindexserver)
+- **`components/`**: Reusable components across frameworks
+  - `engines/` - Chat and agent engines
+  - `loaders/` - File, web, database loaders
+  - `providers/` - AI model configurations
+  - `vectordbs/` - Vector database integrations
+  - `use-cases/` - Workflow implementations
+
+### Development Workflow
+
+- Templates support multiple frameworks (Next.js, Express, FastAPI)
+- Component system allows mix-and-match functionality
+- E2E tests validate generated projects work correctly
+
+## Server Framework Architecture
+
+### TypeScript Server (`@llamaindex/server`)
+
+- **Core**: `LlamaIndexServer` class wrapping Next.js with workflow support
+- **Frontend**: React-based chat UI with shadcn/ui components
+- **API**: `/api/chat` endpoint with streaming responses
+- **Build Process**: Complex multi-step build including static assets for Python integration
+
+### Python Server (`llama-index-server`)
+
+- **Core**: `LlamaIndexServer` class extending FastAPI
+- **Architecture**: Workflow factory pattern for stateless request handling
+- **UI Generation**: AI-powered React component generation from Pydantic schemas
+- **Development**: Hot reloading support with dev mode
+
+## Common Patterns
+
+### Workflow Integration
+
+Both server frameworks use factory patterns:
+
+```typescript
+// TypeScript
+const server = new LlamaIndexServer({
+  workflow: (context) => createWorkflow(context)
+});
+
+// Python
+def create_workflow(chat_request: ChatRequest) -> Workflow:
+    return MyWorkflow(chat_request.messages)
+```
+
+### Event System
+
+Structured events for UI communication:
+
+- **UIEvent**: Custom components with Pydantic/Zod schemas
+- **ArtifactEvent**: Code/documents for Canvas panel
+- **SourceNodesEvent**: Document sources with metadata
+- **AgentRunEvent**: Tool usage and progress tracking
+
+### File Handling
+
+- Both servers auto-mount `data/` and `output/` directories
+- LlamaCloud integration for remote file access
+- Static file serving through framework-specific methods
+
+## Testing Strategy
+
+### E2E Testing
+
+- Playwright tests in `packages/create-llama/e2e/`
+- Tests both Python and TypeScript generated projects
+- Validates CLI generation and application functionality
+
+### Unit Testing
+
+- Python: pytest with comprehensive API and service tests
+- TypeScript: Integrated testing through build process
+
+## Build Process
+
+### Create-llama CLI
+
+1. TypeScript compilation with bash script
+2. ncc bundling for standalone executable
+3. Template validation and caching
+
+### Server Package Build
+
+1. **prebuild**: Clean directories
+2. **build**: bunchee compilation to ESM/CJS
+3. **postbuild**: Next.js preparation and static asset generation
+4. **prepare:py-static**: Python integration assets
+
+### Release Process
+
+```bash
+pnpm release     # Build all + publish npm packages + Python release
+```
+
+## Development Environment Setup
+
+### Prerequisites
+
+- Node.js >=16.14.0
+- Python with uv package manager
+- pnpm for package management
+
+### Common Workflow
+
+1. Clone repository and run `pnpm install`
+2. For CLI development: work in `packages/create-llama/`
+3. For server development: choose TypeScript or Python package
+4. Use `pnpm dev` for concurrent development across packages
+5. Run `pnpm e2e` to validate changes with generated projects
+
+## Special Considerations
+
+### Template Development
+
+- Changes to templates require rebuilding CLI
+- E2E tests validate template functionality across frameworks
+- Template caching system speeds up repeated builds
+
+### Cross-package Dependencies
+
+- Server package builds static assets for Python integration
+- Version synchronization between TypeScript and Python packages
+- Shared UI components and styling across implementations
+
+### Performance
+
+- CLI uses caching for template operations
+- Server frameworks support streaming responses
+- Background processing for file operations and LlamaCloud integration
@@ -106,25 +106,6 @@ Ok to proceed? (y) y
 You can also pass command line arguments to set up a new project
 non-interactively. For a list of the latest options, call `create-llama --help`.

-### Running in pro mode
-
-If you prefer more advanced customization options, you can run `create-llama` in pro mode using the `--pro` flag.
-
-In pro mode, instead of selecting a predefined use case, you'll be prompted to select each technical component of your project. This allows for greater flexibility in customizing your project, including:
-
- **Vector Store**: Choose from a variety of vector stores for keeping your documents, including MongoDB, Pinecone, Weaviate, Qdrant and Chroma.
- **Tools**: Choose from a variety of agent tools (functions called by the LLM), such as:
-  - Code Interpreter: Executes Python code in a secure Jupyter notebook environment
-  - Artifact Code Generator: Generates code artifacts that can be run in a sandbox
-  - OpenAPI Action: Facilitates requests to a provided OpenAPI schema
-  - Image Generator: Creates images based on text descriptions
-  - Web Search: Performs web searches to retrieve up-to-date information
- **Data Sources**: Integrate various data sources into your chat application, including local files, websites, or database-retrieved data.
- **Backend Options**: Besides using Next.js or FastAPI, you can also select to use Express for a more traditional Node.js application.
- **Observability**: Choose from a variety of LLM observability tools, including LlamaTrace and Traceloop.
-
-Pro mode is ideal for developers who want fine-grained control over their project's configuration and are comfortable with more technical setup options.
-
 ## LlamaIndex Documentation

 - [TS/JS docs](https://ts.llamaindex.ai/)
@@ -18,6 +18,21 @@ export default tseslint.config(
    },
  },
  {
+    files: ["packages/create-llama/**"],
+    rules: {
+      "max-params": ["error", 4],
+      "prefer-const": "error",
+      "no-empty": "off",
+      "no-extra-boolean-cast": "off",
+      "@typescript-eslint/no-explicit-any": "off",
+      "@typescript-eslint/no-unused-vars": "off",
+      "@typescript-eslint/no-empty-object-type": "off",
+      "@typescript-eslint/no-wrapper-object-types": "off",
+      "@typescript-eslint/ban-ts-comment": "off",
+    },
+  },
+  {
+    files: ["packages/server/**"],
    rules: {
      "no-irregular-whitespace": "off",
      "@typescript-eslint/no-unused-vars": "off",
@@ -31,12 +46,20 @@ export default tseslint.config(
  },
  {
    ignores: [
+      "python/**",
+      "**/*.mypy_cache/**",
+      "**/*.venv/**",
+      "**/*.ruff_cache/**",
      "**/dist/**",
+      "**/e2e/cache/**",
      "**/lib/*",
      "**/.next/**",
      "**/out/**",
      "**/node_modules/**",
      "**/build/**",
+      "packages/server/server/**",
+      "packages/server/project/**",
+      "packages/server/bin/**",
    ],
  },
 );
@@ -1,39 +1,58 @@
 {
-	"name": "create-llama-monorepo",
-	"version": "1.0.0",
-	"private": true,
-	"description": "Monorepo for create-llama",
-	"keywords": [
-		"rag",
-		"llamaindex"
-	],
-	"repository": {
-		"type": "git",
-		"url": "https://github.com/run-llama/create-llama"
-	},
-	"license": "MIT",
-	"workspaces": [
-		"packages/*"
-	],
-	"scripts": {
-		"prepare": "husky",
-		"new-snapshot": "pnpm -r build && changeset version --snapshot",
-		"new-version": "pnpm -r build && changeset version",
-		"release": "pnpm -r build && changeset publish",
-		"release-snapshot": "pnpm -r build && changeset publish --tag snapshot",
-		"build": "pnpm -r build",
-		"e2e": "pnpm -r e2e",
-		"dev": "pnpm -r dev",
-		"format": "pnpm -r format",
-		"format:write": "pnpm -r format:write",
-		"lint": "pnpm -r lint"
-	},
-	"devDependencies": {
-		"@changesets/cli": "^2.27.1",
-		"husky": "^9.0.10"
-	},
-	"packageManager": "pnpm@9.0.5",
-	"engines": {
-		"node": ">=16.14.0"
-	}
+  "name": "create-llama-monorepo",
+  "version": "1.0.0",
+  "private": true,
+  "description": "Monorepo for create-llama",
+  "keywords": [
+    "rag",
+    "llamaindex"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/run-llama/create-llama"
+  },
+  "license": "MIT",
+  "workspaces": [
+    "packages/*",
+    "python/*"
+  ],
+  "scripts": {
+    "dev": "pnpm -r dev",
+    "build": "pnpm -r build",
+    "e2e": "pnpm -r e2e",
+    "lint": "eslint .",
+    "format": "prettier --ignore-unknown --cache --check .",
+    "format:write": "prettier --ignore-unknown --write .",
+    "prepare": "husky",
+    "new-snapshot": "pnpm -r build && changeset version --snapshot",
+    "new-version-python": "pnpm --filter @create-llama/llama-index-server new-version",
+    "new-version": "pnpm -r build && changeset version && pnpm new-version-python",
+    "release-python": "pnpm --filter @create-llama/llama-index-server release",
+    "release": "pnpm -r build && changeset publish && pnpm release-python",
+    "release-snapshot": "pnpm -r build && changeset publish --tag snapshot"
+  },
+  "devDependencies": {
+    "@changesets/cli": "^2.27.1",
+    "bunchee": "6.4.0",
+    "husky": "^9.0.10",
+    "lint-staged": "^15.2.11",
+    "typescript-eslint": "^8.18.0",
+    "globals": "^15.12.0",
+    "eslint": "9.22.0",
+    "@eslint/js": "^9.25.0",
+    "eslint-config-next": "^15.1.0",
+    "eslint-config-prettier": "^9.1.0",
+    "eslint-plugin-react": "7.37.2",
+    "prettier": "^3.4.2",
+    "prettier-plugin-organize-imports": "^4.1.0",
+    "prettier-plugin-tailwindcss": "^0.6.11",
+    "typescript": "^5.7.3",
+    "@types/node": "^22.9.0",
+    "@types/react": "^19",
+    "@types/react-dom": "^19"
+  },
+  "packageManager": "pnpm@9.0.5",
+  "engines": {
+    "node": ">=16.14.0"
+  }
 }
@@ -1,12 +0,0 @@
-{
-  "extends": [
-    "prettier"
-  ],
-  "rules": {
-    "max-params": [
-      "error",
-      4
-    ],
-    "prefer-const": "error",
-  },
-}
@@ -1,6 +0,0 @@
-apps/docs/i18n
-apps/docs/docs/api
-pnpm-lock.yaml
-lib/
-dist/
-.docusaurus/
@@ -1,5 +1,74 @@
 # create-llama

+## 0.5.21
+
+### Patch Changes
+
+- af9ad3c: feat: show document artifact after generating report
+- a543a27: feat: bump chat-ui with inline artifact
+
+## 0.5.20
+
+### Patch Changes
+
+- 3ff0a18: fix: default header padding
+
+## 0.5.19
+
+### Patch Changes
+
+- 5fe9e17: support eject to fully customize next folder
+- b8a1ff6: Support citation for agentic template (Python)
+
+## 0.5.18
+
+### Patch Changes
+
+- 8d59ef0: Add layout_dir config to the generated python code
+
+## 0.5.17
+
+### Patch Changes
+
+- eee3230: feat: support custom layout
+
+## 0.5.16
+
+### Patch Changes
+
+- 6f75d4a: fix: unsupported language in code gen workflow
+- d0618fa: Fix LlamaCloud generate script issue
+
+## 0.5.15
+
+### Patch Changes
+
+- 527075c: Enable dev mode that allows updating code directly in the UI
+
+## 0.5.14
+
+### Patch Changes
+
+- 1df8cfb: Split artifacts use case to document generator and code generator
+- 1b5a519: chore: improve dev experience with nodemon
+- b3eb0ba: Fix typing check issue
+- 556f33c: fix chromadb dependency issue
+- 2451539: fix: remove dead generated ai code
+- 7a70390: Deprecate pro mode
+
+## 0.5.13
+
+### Patch Changes
+
+- f4ca602: Add artifact use case for Typescript template
+- f4ca602: Update typescript use cases to use the new workflow engine
+
+## 0.5.12
+
+### Patch Changes
+
+- 241d82a: Add artifacts use case (python)
+
 ## 0.5.11

 ### Patch Changes
@@ -0,0 +1,108 @@
+# create-llama Package
+
+## Overview
+
+The `create-llama` package is a CLI tool for creating LlamaIndex-powered applications with one command. It's designed as a project generator that scaffolds various types of RAG (Retrieval-Augmented Generation) applications using different frameworks, databases, and AI model providers.
+
+## Package Structure
+
+### Core Files
+
+- **`index.ts`**: Main CLI entry point using Commander.js for argument parsing
+- **`create-app.ts`**: Core application creation logic and orchestration
+- **`package.json`**: Package configuration with binary entry point at `./dist/index.js`
+
+### Key Directories
+
+- **`helpers/`**: Utility functions for package management, file operations, and configuration
+- **`questions/`**: Interactive prompts for user configuration
+- **`templates/`**: Project templates for different frameworks and use cases
+- **`e2e/`**: End-to-end tests using Playwright
+
+## Core Functionality
+
+### CLI Interface
+
+The tool accepts numerous command-line options including:
+
+- Framework selection (`--framework`: nextjs, express, fastapi)
+- Template type (`--template`: streaming, multiagent, reflex, llamaindexserver)
+- Model providers (OpenAI, Anthropic, Groq, Ollama, etc.)
+- Vector databases (none, mongo, pg, pinecone, milvus, etc.)
+- Data sources (files, web URLs, databases)
+- Tools and observability options
+
+### Application Generation Flow
+
+1. **Project validation**: Checks project name validity and directory permissions
+2. **Interactive questioning**: Prompts user for configuration if not provided via CLI
+3. **Template installation**: Copies and configures appropriate templates
+4. **Environment setup**: Creates `.env` files with API keys and configuration
+5. **Dependencies**: Installs packages using detected/specified package manager
+6. **Post-install actions**: Can run the app, open VSCode, or install dependencies
+
+### Template System
+
+Templates are organized by:
+
+- **Framework**: NextJS (frontend), Express (Node backend), FastAPI (Python backend)
+- **Type**: Streaming chat, multiagent workflows, Reflex UI, LlamaIndex server
+- **Components**: Engines, loaders, providers, UI components, observability
+
+### Helper Functions
+
+Key helper modules include:
+
+- **Installation**: Package manager detection and dependency installation
+- **Data sources**: File copying, web scraping, database connection setup
+- **Providers**: Model provider configuration (OpenAI, Anthropic, etc.)
+- **Tools**: Integration with external tools (Wikipedia, weather, code generation)
+- **Environment**: `.env` file generation with API keys and settings
+
+## Development Commands
+
+### Build & Development
+
+- `npm run build`: Build the CLI using bash script
+- `npm run dev`: Watch mode development build
+- `npm run clean`: Clean build artifacts and temporary files
+
+### Testing
+
+- `npm run e2e`: Run all end-to-end tests
+- `npm run e2e:python`: Test Python-specific templates
+- `npm run e2e:typescript`: Test TypeScript-specific templates
+
+### Package Management
+
+- `npm run pack-install`: Create and install local package for testing
+
+## Architecture Notes
+
+### Model Configuration
+
+The tool supports multiple AI providers with a unified `ModelConfig` interface that includes:
+
+- Provider selection and API key management
+- Model and embedding model specification
+- Dimension configuration for embeddings
+
+### Data Source Handling
+
+Flexible data source configuration supporting:
+
+- Local files and directories
+- Web URLs with configurable crawling depth
+- Database connections with custom queries
+- Automatic file downloading and copying
+
+### Template Flexibility
+
+Templates use a component-based system allowing mix-and-match of:
+
+- Different frameworks (NextJS, Express, FastAPI)
+- Various vector databases
+- Multiple observability tools
+- Configurable tools and integrations
+
+This package serves as the foundation for rapidly prototyping and deploying LlamaIndex applications across different technology stacks and use cases.
@@ -1,4 +1,3 @@
-/* eslint-disable import/no-extraneous-dependencies */
 import path from "path";
 import { green, yellow } from "picocolors";
 import { tryGitInit } from "./helpers/git";
@@ -3,7 +3,7 @@ import { exec } from "child_process";
 import fs from "fs";
 import path from "path";
 import util from "util";
-import { TemplateFramework, TemplateVectorDB } from "../../helpers/types";
+import { TemplateFramework, TemplateType, TemplateUseCase, TemplateVectorDB } from "../../helpers/types";
 import { RunCreateLlamaOptions, createTestDir, runCreateLlama } from "../utils";

 const execAsync = util.promisify(exec);
@@ -11,123 +11,193 @@ const execAsync = util.promisify(exec);
 const templateFramework: TemplateFramework = process.env.FRAMEWORK
  ? (process.env.FRAMEWORK as TemplateFramework)
  : "fastapi";
+const templateType: TemplateType = process.env.TEMPLATE_TYPE
+  ? (process.env.TEMPLATE_TYPE as TemplateType)
+  : "llamaindexserver";
+const useCases: TemplateUseCase[] = [
+  "agentic_rag",
+  "deep_research",
+  "financial_report",
+  "code_generator",
+  "document_generator",
+];
 const dataSource: string = process.env.DATASOURCE
  ? process.env.DATASOURCE
  : "--example-file";

-// TODO: add support for other templates
+test.describe("Mypy check", () => {
+  test.describe.configure({ retries: 0 });

-if (
-  dataSource === "--example-file" // XXX: this test provides its own data source - only trigger it on one data source (usually the CI matrix will trigger multiple data sources)
-) {
-  // vectorDBs, tools, and data source combinations to test
-  const vectorDbs: TemplateVectorDB[] = [
-    "mongo",
-    "pg",
-    "pinecone",
-    "milvus",
-    "astra",
-    "qdrant",
-    "chroma",
-    "weaviate",
-  ];
+    // Test for streaming template
+    test.describe("StreamingTemplate", () => {
+      test.skip(templateType !== "streaming", `skipping streaming test for ${templateType}`);
+      if (
+        dataSource === "--example-file" // XXX: this test provides its own data source - only trigger it on one data source (usually the CI matrix will trigger multiple data sources)
+      ) {
+        // vectorDBs, tools, and data source combinations to test
+        const vectorDbs: TemplateVectorDB[] = [
+          "mongo",
+          "pg",
+          "pinecone",
+          "milvus",
+          "astra",
+          "qdrant",
+          "chroma",
+          "weaviate",
+        ];
+        const toolOptions = [
+          "wikipedia.WikipediaToolSpec",
+          "google.GoogleSearchToolSpec",
+          "document_generator",
+          "artifact",
+        ];

-  const toolOptions = [
-    "wikipedia.WikipediaToolSpec",
-    "google.GoogleSearchToolSpec",
-    "document_generator",
-    "artifact",
-  ];
+        const dataSources = [
+          "--example-file",
+          "--web-source https://www.example.com",
+          "--db-source mysql+pymysql://user:pass@localhost:3306/mydb",
+        ];

-  const dataSources = [
-    "--example-file",
-    "--web-source https://www.example.com",
-    "--db-source mysql+pymysql://user:pass@localhost:3306/mydb",
-  ];
+        const observabilityOptions = ["llamatrace", "traceloop"];

-  const observabilityOptions = ["llamatrace", "traceloop"];
+        // Test vector databases
+        for (const vectorDb of vectorDbs) {
+          test(`vectorDB: ${vectorDb} ${templateType}`, async () => {
+            const cwd = await createTestDir();
+            const { pyprojectPath } = await createAndCheckLlamaProject({
+              options: {
+                cwd,
+                templateType: "streaming",
+                templateFramework,
+                dataSource: "--example-file",
+                vectorDb,
+                tools: "none",
+                port: 3000,
+                postInstallAction: "none",
+                templateUI: undefined,
+                appType: "--no-frontend",
+                llamaCloudProjectName: undefined,
+                llamaCloudIndexName: undefined,
+                observability: undefined,
+              },
+            });

-  test.describe("Mypy check", () => {
-    test.describe.configure({ retries: 0 });
+            const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
+            if (vectorDb !== "none") {
+              if (vectorDb === "pg") {
+                expect(pyprojectContent).toContain(
+                  "llama-index-vector-stores-postgres",
+                );
+              } else {
+                expect(pyprojectContent).toContain(
+                  `llama-index-vector-stores-${vectorDb}`,
+                );
+              }
+            }
+          });
+        }

-    // Test vector databases
-    for (const vectorDb of vectorDbs) {
-      test(`Mypy check for vectorDB: ${vectorDb}`, async () => {
+        // // Test tools
+        for (const tool of toolOptions) {
+          test(`tool: ${tool} ${templateType}`, async () => {
+            const cwd = await createTestDir();
+            const { pyprojectPath } = await createAndCheckLlamaProject({
+              options: {
+                cwd,
+                templateType: "streaming",
+                templateFramework,
+                dataSource: "--example-file",
+                vectorDb: "none",
+                tools: tool,
+                port: 3000,
+                postInstallAction: "none",
+                templateUI: undefined,
+                appType: "--no-frontend",
+                llamaCloudProjectName: undefined,
+                llamaCloudIndexName: undefined,
+                observability: undefined,
+              },
+            });
+
+            const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
+            if (tool === "wikipedia.WikipediaToolSpec") {
+              expect(pyprojectContent).toContain("wikipedia");
+            }
+            if (tool === "google.GoogleSearchToolSpec") {
+              expect(pyprojectContent).toContain("google");
+            }
+          });
+        }
+
+        // // Test data sources
+        for (const dataSource of dataSources) {
+          test(`data source: ${dataSource} ${templateType}`, async () => {
+          const dataSourceType = dataSource.split(" ")[0];
+            const cwd = await createTestDir();
+            const { pyprojectPath } = await createAndCheckLlamaProject({
+              options: {
+                cwd,
+                templateType: "streaming",
+                templateFramework,
+                dataSource,
+                vectorDb: "none",
+                tools: "none",
+                port: 3000,
+                postInstallAction: "none",
+                templateUI: undefined,
+                appType: "--no-frontend",
+                llamaCloudProjectName: undefined,
+                llamaCloudIndexName: undefined,
+                observability: undefined,
+              },
+            });
+
+            const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
+            if (dataSource.includes("--web-source")) {
+              expect(pyprojectContent).toContain("llama-index-readers-web");
+            }
+            if (dataSource.includes("--db-source")) {
+              expect(pyprojectContent).toContain("llama-index-readers-database");
+            }
+          });
+        }
+
+        // Test observability options
+        for (const observability of observabilityOptions) {
+          test.describe(`observability: ${observability} ${templateType}`, async () => {
+            const cwd = await createTestDir();
+
+            const { pyprojectPath } = await createAndCheckLlamaProject({
+              options: {
+                cwd,
+                templateType: "streaming",
+                templateFramework,
+                dataSource: "--example-file",
+                vectorDb: "none",
+                tools: "none",
+                port: 3000,
+                postInstallAction: "none",
+                templateUI: undefined,
+                appType: "--no-frontend",
+                llamaCloudProjectName: undefined,
+                llamaCloudIndexName: undefined,
+                observability,
+              },
+            });
+          });
+        }
+      }
+    });
+
+    test.describe("LlamaIndexServer", async () => {
+      test.skip(templateType !== "llamaindexserver", `skipping llamaindexserver test for ${templateType}`);
+      test.skip(dataSource !== "--example-file", `skipping llamaindexserver test for ${dataSource}`);
+      for (const useCase of useCases) {
        const cwd = await createTestDir();
-        const { pyprojectPath } = await createAndCheckLlamaProject({
+        await createAndCheckLlamaProject({
          options: {
            cwd,
-            templateType: "streaming",
-            templateFramework,
-            dataSource: "--example-file",
-            vectorDb,
-            tools: "none",
-            port: 3000,
-            postInstallAction: "none",
-            templateUI: undefined,
-            appType: "--no-frontend",
-            llamaCloudProjectName: undefined,
-            llamaCloudIndexName: undefined,
-            observability: undefined,
-          },
-        });
-
-        const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
-        if (vectorDb !== "none") {
-          if (vectorDb === "pg") {
-            expect(pyprojectContent).toContain(
-              "llama-index-vector-stores-postgres",
-            );
-          } else {
-            expect(pyprojectContent).toContain(
-              `llama-index-vector-stores-${vectorDb}`,
-            );
-          }
-        }
-      });
-    }
-
-    // Test tools
-    for (const tool of toolOptions) {
-      test(`Mypy check for tool: ${tool}`, async () => {
-        const cwd = await createTestDir();
-        const { pyprojectPath } = await createAndCheckLlamaProject({
-          options: {
-            cwd,
-            templateType: "streaming",
-            templateFramework,
-            dataSource: "--example-file",
-            vectorDb: "none",
-            tools: tool,
-            port: 3000,
-            postInstallAction: "none",
-            templateUI: undefined,
-            appType: "--no-frontend",
-            llamaCloudProjectName: undefined,
-            llamaCloudIndexName: undefined,
-            observability: undefined,
-          },
-        });
-
-        const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
-        if (tool === "wikipedia.WikipediaToolSpec") {
-          expect(pyprojectContent).toContain("wikipedia");
-        }
-        if (tool === "google.GoogleSearchToolSpec") {
-          expect(pyprojectContent).toContain("google");
-        }
-      });
-    }
-
-    // Test data sources
-    for (const dataSource of dataSources) {
-      const dataSourceType = dataSource.split(" ")[0];
-      test(`Mypy check for data source: ${dataSourceType}`, async () => {
-        const cwd = await createTestDir();
-        const { pyprojectPath } = await createAndCheckLlamaProject({
-          options: {
-            cwd,
-            templateType: "streaming",
+            templateType: "llamaindexserver",
            templateFramework,
            dataSource,
            vectorDb: "none",
@@ -139,110 +209,77 @@ if (
            llamaCloudProjectName: undefined,
            llamaCloudIndexName: undefined,
            observability: undefined,
+            useCase,
          },
        });
+      }
+    });

-        const pyprojectContent = fs.readFileSync(pyprojectPath, "utf-8");
-        if (dataSource.includes("--web-source")) {
-          expect(pyprojectContent).toContain("llama-index-readers-web");
-        }
-        if (dataSource.includes("--db-source")) {
-          expect(pyprojectContent).toContain("llama-index-readers-database");
-        }
-      });
-    }
+    async function createAndCheckLlamaProject({
+      options,
+    }: {
+      options: RunCreateLlamaOptions;
+    }): Promise<{ pyprojectPath: string; projectPath: string }> {
+      const result = await runCreateLlama(options);
+      const name = result.projectName;
+      const projectPath = path.join(options.cwd, name);

-    // Test observability options
-    for (const observability of observabilityOptions) {
-      test(`Mypy check for observability: ${observability}`, async () => {
-        const cwd = await createTestDir();
+      // Check if the app folder exists
+      expect(fs.existsSync(projectPath)).toBeTruthy();

-        const { pyprojectPath } = await createAndCheckLlamaProject({
-          options: {
-            cwd,
-            templateType: "streaming",
-            templateFramework,
-            dataSource: "--example-file",
-            vectorDb: "none",
-            tools: "none",
-            port: 3000,
-            postInstallAction: "none",
-            templateUI: undefined,
-            appType: "--no-frontend",
-            llamaCloudProjectName: undefined,
-            llamaCloudIndexName: undefined,
-            observability,
-          },
-        });
-      });
-    }
-  });
-}
+      // Check if pyproject.toml exists
+      const pyprojectPath = path.join(projectPath, "pyproject.toml");
+      expect(fs.existsSync(pyprojectPath)).toBeTruthy();

-async function createAndCheckLlamaProject({
-  options,
-}: {
-  options: RunCreateLlamaOptions;
-}): Promise<{ pyprojectPath: string; projectPath: string }> {
-  const result = await runCreateLlama(options);
-  const name = result.projectName;
-  const projectPath = path.join(options.cwd, name);
+      // Modify environment for the command
+      const commandEnv = {
+        ...process.env,
+      };

-  // Check if the app folder exists
-  expect(fs.existsSync(projectPath)).toBeTruthy();
+      console.log("Running uv venv...");
+      try {
+        const { stdout: venvStdout, stderr: venvStderr } = await execAsync(
+          "uv venv",
+          { cwd: projectPath, env: commandEnv },
+        );
+        console.log("uv venv stdout:", venvStdout);
+        console.error("uv venv stderr:", venvStderr);
+      } catch (error) {
+        console.error("Error running uv venv:", error);
+        throw error; // Re-throw error to fail the test
+      }

-  // Check if pyproject.toml exists
-  const pyprojectPath = path.join(projectPath, "pyproject.toml");
-  expect(fs.existsSync(pyprojectPath)).toBeTruthy();
+      console.log("Running uv sync...");
+      try {
+        const { stdout: syncStdout, stderr: syncStderr } = await execAsync(
+          "uv sync --all-extras",
+          { cwd: projectPath, env: commandEnv },
+        );
+        console.log("uv sync stdout:", syncStdout);
+        console.error("uv sync stderr:", syncStderr);
+      } catch (error) {
+        console.error("Error running uv sync:", error);
+        throw error; // Re-throw error to fail the test
+      }

-  // Modify environment for the command
-  const commandEnv = {
-    ...process.env,
-  };
+      console.log("Running uv run mypy ....");
+      try {
+        const { stdout: mypyStdout, stderr: mypyStderr } = await execAsync(
+          "uv run mypy .",
+          { cwd: projectPath, env: commandEnv },
+        );
+        console.log("uv run mypy stdout:", mypyStdout);
+        console.error("uv run mypy stderr:", mypyStderr);
+        // Assuming mypy success means no output or specific success message
+        // Adjust checks based on actual expected mypy output
+      } catch (error) {
+        console.error("Error running mypy:", error);
+        throw error;
+      }

-  console.log("Running uv venv...");
-  try {
-    const { stdout: venvStdout, stderr: venvStderr } = await execAsync(
-      "uv venv",
-      { cwd: projectPath, env: commandEnv },
-    );
-    console.log("uv venv stdout:", venvStdout);
-    console.error("uv venv stderr:", venvStderr);
-  } catch (error) {
-    console.error("Error running uv venv:", error);
-    throw error; // Re-throw error to fail the test
+      // If we reach this point without throwing an error, the test passes
+      expect(true).toBeTruthy();
+
+    return { pyprojectPath, projectPath };
  }
-
-  console.log("Running uv sync...");
-  try {
-    const { stdout: syncStdout, stderr: syncStderr } = await execAsync(
-      "uv sync --all-extras",
-      { cwd: projectPath, env: commandEnv },
-    );
-    console.log("uv sync stdout:", syncStdout);
-    console.error("uv sync stderr:", syncStderr);
-  } catch (error) {
-    console.error("Error running uv sync:", error);
-    throw error; // Re-throw error to fail the test
-  }
-
-  console.log("Running uv run mypy ....");
-  try {
-    const { stdout: mypyStdout, stderr: mypyStderr } = await execAsync(
-      "uv run mypy .",
-      { cwd: projectPath, env: commandEnv },
-    );
-    console.log("uv run mypy stdout:", mypyStdout);
-    console.error("uv run mypy stderr:", mypyStderr);
-    // Assuming mypy success means no output or specific success message
-    // Adjust checks based on actual expected mypy output
-  } catch (error) {
-    console.error("Error running mypy:", error);
-    throw error;
-  }
-
-  // If we reach this point without throwing an error, the test passes
-  expect(true).toBeTruthy();
-
-  return { pyprojectPath, projectPath };
-}
+});
@@ -1,6 +1,5 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { expect, test } from "@playwright/test";
-import { ChildProcess } from "child_process";
+import { ChildProcess, execSync } from "child_process";
 import fs from "fs";
 import path from "path";
 import type {
@@ -13,21 +12,31 @@ import { createTestDir, runCreateLlama, type AppType } from "../utils";
 const templateFramework: TemplateFramework = process.env.FRAMEWORK
  ? (process.env.FRAMEWORK as TemplateFramework)
  : "fastapi";
-const dataSource: string = "--example-file";
+const dataSource: string = process.env.DATASOURCE
+  ? (process.env.DATASOURCE as string)
+  : "--example-file";
+const llamaCloudProjectName = "create-llama";
+const llamaCloudIndexName = "e2e-test";
+
 const templateUI: TemplateUI = "shadcn";
 const templatePostInstallAction: TemplatePostInstallAction = "runApp";
 const appType: AppType = "--frontend";
 const userMessage = "Write a blog post about physical standards for letters";
-const templateUseCases = ["financial_report", "agentic_rag", "deep_research"];
+const templateUseCases = [
+  "agentic_rag",
+  "financial_report",
+  "deep_research",
+  "code_generator",
+];
+const ejectDir = "next";

 for (const useCase of templateUseCases) {
  test.describe(`Test use case ${useCase} ${templateFramework} ${dataSource} ${templateUI} ${appType} ${templatePostInstallAction}`, async () => {
    test.skip(
-      process.platform !== "linux" ||
-        process.env.DATASOURCE === "--no-files" ||
-        templateFramework === "express",
+      dataSource === "--no-files" || templateFramework === "express",
      "The llamaindexserver template currently only works with nextjs, fastapi. We also only run on Linux to speed up tests.",
    );
+    const useLlamaParse = dataSource === "--llamacloud";
    let port: number;
    let cwd: string;
    let name: string;
@@ -49,6 +58,9 @@ for (const useCase of templateUseCases) {
        templateUI,
        appType,
        useCase,
+        llamaCloudProjectName,
+        llamaCloudIndexName,
+        useLlamaParse,
      });
      name = result.projectName;
      appProcess = result.appProcess;
@@ -99,6 +111,28 @@ for (const useCase of templateUseCases) {
      expect(response.ok()).toBeTruthy();
    });

+    test("Should successfully eject, install dependencies and build without errors", async () => {
+      test.skip(
+        templateFramework !== "nextjs" ||
+          useCase !== "code_generator" ||
+          dataSource === "--llamacloud",
+        "Eject test only applies to Next.js framework, code generator use case, and non-llamacloud",
+      );
+
+      // Run eject command
+      execSync("npm run eject", { cwd: path.join(cwd, name) });
+
+      // Verify next directory exists
+      const nextDirExists = fs.existsSync(path.join(cwd, name, ejectDir));
+      expect(nextDirExists).toBeTruthy();
+
+      // Install dependencies in next directory
+      execSync("npm install", { cwd: path.join(cwd, name, ejectDir) });
+
+      // Run build
+      execSync("npm run build", { cwd: path.join(cwd, name, ejectDir) });
+    });
+
    // clean processes
    test.afterAll(async () => {
      appProcess?.kill();
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { expect, test } from "@playwright/test";
 import { ChildProcess } from "child_process";
 import fs from "fs";
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { expect, test } from "@playwright/test";
 import { ChildProcess } from "child_process";
 import fs from "fs";
@@ -3,7 +3,12 @@ import { exec } from "child_process";
 import fs from "fs";
 import path from "path";
 import util from "util";
-import { TemplateFramework, TemplateVectorDB } from "../../helpers/types";
+import {
+  TemplateFramework,
+  TemplateType,
+  TemplateUseCase,
+  TemplateVectorDB,
+} from "../../helpers/types";
 import { createTestDir, runCreateLlama } from "../utils";

 const execAsync = util.promisify(exec);
@@ -11,6 +16,16 @@ const execAsync = util.promisify(exec);
 const templateFramework: TemplateFramework = process.env.FRAMEWORK
  ? (process.env.FRAMEWORK as TemplateFramework)
  : "nextjs";
+const templateType: TemplateType = process.env.TEMPLATE_TYPE
+  ? (process.env.TEMPLATE_TYPE as TemplateType)
+  : "llamaindexserver";
+const useCases: TemplateUseCase[] = [
+  "agentic_rag",
+  "deep_research",
+  "financial_report",
+  "code_generator",
+  "document_generator",
+];
 const dataSource: string = process.env.DATASOURCE
  ? process.env.DATASOURCE
  : "--example-file";
@@ -29,77 +44,118 @@ const vectorDbs: TemplateVectorDB[] = [
 ];

 test.describe("Test resolve TS dependencies", () => {
+  test.describe.configure({ retries: 0 });
+
  // Test vector DBs without LlamaParse
  for (const vectorDb of vectorDbs) {
-    const optionDescription = `vectorDb: ${vectorDb}, dataSource: ${dataSource}`;
+    const optionDescription = `templateType: ${templateType}, vectorDb: ${vectorDb}, dataSource: ${dataSource}`;

    test(`Vector DB test - ${optionDescription}`, async () => {
-      await runTest(vectorDb, false);
+      // skip vectordb test for llamaindexserver
+      test.skip(
+        templateType === "llamaindexserver",
+        "skipping vectorDB test for llamaindexserver",
+      );
+
+      await runTest({
+        templateType: templateType,
+        useLlamaParse: false, // Disable LlamaParse for vectorDB test
+        vectorDb: vectorDb,
+      });
    });
  }

-  // Test LlamaParse with vectorDB 'none'
-  test(`LlamaParse test - vectorDb: none, dataSource: ${dataSource}, llamaParse: true`, async () => {
-    await runTest("none", true);
-  });
-
-  async function runTest(
-    vectorDb: TemplateVectorDB | "none",
-    useLlamaParse: boolean,
-  ) {
-    const cwd = await createTestDir();
-
-    const result = await runCreateLlama({
-      cwd: cwd,
-      templateType: "streaming",
-      templateFramework: templateFramework,
-      dataSource: dataSource,
-      vectorDb: vectorDb,
-      port: 3000,
-      postInstallAction: "none",
-      templateUI: undefined,
-      appType: templateFramework === "nextjs" ? "" : "--no-frontend",
-      llamaCloudProjectName: undefined,
-      llamaCloudIndexName: undefined,
-      tools: undefined,
-      useLlamaParse: useLlamaParse,
-    });
-    const name = result.projectName;
-
-    // Check if the app folder exists
-    const appDir = path.join(cwd, name);
-    const dirExists = fs.existsSync(appDir);
-    expect(dirExists).toBeTruthy();
-
-    // Install dependencies using pnpm
-    try {
-      const { stderr: installStderr } = await execAsync(
-        "pnpm install --prefer-offline --ignore-workspace",
-        {
-          cwd: appDir,
-        },
-      );
-    } catch (error) {
-      console.error("Error installing dependencies:", error);
-      throw error;
-    }
-
-    // Run tsc type check and capture the output
-    try {
-      const { stdout, stderr } = await execAsync(
-        "pnpm exec tsc -b --diagnostics",
-        {
-          cwd: appDir,
-        },
-      );
-      // Check if there's any error output
-      expect(stderr).toBeFalsy();
-
-      // Log the stdout for debugging purposes
-      console.log("TypeScript type-check output:", stdout);
-    } catch (error) {
-      console.error("Error running tsc:", error);
-      throw error;
+  // No vectorDB, with LlamaParse and useCase
+  // Only need to test use case with example data source
+  if (dataSource === "--example-file") {
+    for (const useCase of useCases) {
+      const optionDescription = `templateType: ${templateType}, useCase: ${useCase}`;
+      test.describe(`useCase test - ${optionDescription}`, () => {
+        test.skip(
+          templateType === "streaming",
+          "Skipping use case test for streaming template.",
+        );
+        test(`no llamaParse - ${optionDescription}`, async () => {
+          await runTest({
+            templateType: templateType,
+            useLlamaParse: false,
+            useCase: useCase,
+          });
+        });
+        // Skipping llamacloud for the use case doesn't use index.
+        if (useCase !== "code_generator" && useCase !== "document_generator") {
+          test(`llamaParse - ${optionDescription}`, async () => {
+            await runTest({
+              templateType: templateType,
+              useLlamaParse: true,
+              useCase: useCase,
+            });
+          });
+        }
+      });
    }
  }
 });
+
+async function runTest(options: {
+  templateType: TemplateType;
+  useLlamaParse: boolean;
+  useCase?: TemplateUseCase;
+  vectorDb?: TemplateVectorDB;
+}) {
+  const cwd = await createTestDir();
+
+  const result = await runCreateLlama({
+    cwd: cwd,
+    templateType: options.templateType,
+    templateFramework: templateFramework,
+    dataSource: dataSource,
+    vectorDb: options.vectorDb ?? "none",
+    port: 3000,
+    postInstallAction: "none",
+    templateUI: undefined,
+    appType: templateFramework === "nextjs" ? "" : "--no-frontend",
+    llamaCloudProjectName: undefined,
+    llamaCloudIndexName: undefined,
+    tools: undefined,
+    useLlamaParse: options.useLlamaParse,
+    useCase: options.useCase,
+  });
+  const name = result.projectName;
+
+  // Check if the app folder exists
+  const appDir = path.join(cwd, name);
+  const dirExists = fs.existsSync(appDir);
+  expect(dirExists).toBeTruthy();
+
+  // Install dependencies using pnpm
+  try {
+    const { stderr: installStderr } = await execAsync(
+      "pnpm install --prefer-offline --ignore-workspace",
+      {
+        cwd: appDir,
+      },
+    );
+  } catch (error) {
+    console.error("Error installing dependencies:", error);
+    throw error;
+  }
+
+  // Run tsc type check and capture the output
+  try {
+    const { stdout, stderr } = await execAsync(
+      "pnpm exec tsc -b --diagnostics",
+      {
+        cwd: appDir,
+      },
+    );
+    // Check if there's any error output
+    expect(stderr).toBeFalsy();
+
+    // Log the stdout for debugging purposes
+    console.log("TypeScript type-check output:", stdout);
+  } catch (error) {
+    console.error("Error running tsc:", error);
+    throw error;
+  }
+}
@@ -67,8 +67,8 @@ export async function runCreateLlama({
  ].join("-");

  // Handle different data source types
-  let dataSourceArgs = [];
-  if (dataSource.includes("--web-source" || "--db-source")) {
+  const dataSourceArgs = [];
+  if (dataSource.includes("--web-source")) {
    const webSource = dataSource.split(" ")[1];
    dataSourceArgs.push("--web-source", webSource);
  } else if (dataSource.includes("--db-source")) {
@@ -1,4 +1,3 @@
-/* eslint-disable import/no-extraneous-dependencies */
 import { async as glob } from "fast-glob";
 import fs from "fs";
 import path from "path";
@@ -181,7 +181,7 @@ const getVectorDBEnvs = (
            ]
          : []),
      ];
-    case "chroma":
+    case "chroma": {
      const envs = [
        {
          name: "CHROMA_COLLECTION",
@@ -206,6 +206,7 @@ Otherwise, use CHROMA_HOST and CHROMA_PORT config above`,
        });
      }
      return envs;
+    }
    case "weaviate":
      return [
        {
@@ -1,4 +1,3 @@
-/* eslint-disable import/no-extraneous-dependencies */
 import { execSync } from "child_process";
 import fs from "fs";
 import path from "path";
@@ -18,6 +18,7 @@ import {
  ModelConfig,
  TemplateDataSource,
  TemplateFramework,
+  TemplateUseCase,
  TemplateVectorDB,
 } from "./types";
 import { installTSTemplate } from "./typescript";
@@ -60,6 +61,7 @@ async function generateContextData(
  vectorDb?: TemplateVectorDB,
  llamaCloudKey?: string,
  useLlamaParse?: boolean,
+  useCase?: TemplateUseCase,
 ) {
  if (packageManager) {
    const runGenerate = `${cyan(
@@ -96,7 +98,12 @@ async function generateContextData(
        }
      } else {
        console.log(`Running ${runGenerate} to generate the context data.`);
-        await callPackageManager(packageManager, true, ["run", "generate"]);
+        const shouldRunGenerate =
+          useCase !== "code_generator" && useCase !== "document_generator"; // Artifact use case doesn't use index.
+
+        if (shouldRunGenerate) {
+          await callPackageManager(packageManager, true, ["run", "generate"]);
+        }
        return;
      }
    }
@@ -224,6 +231,7 @@ export const installTemplate = async (
        props.vectorDb,
        props.llamaCloudKey,
        props.useLlamaParse,
+        props.useCase,
      );
    }

@@ -1,4 +1,3 @@
-/* eslint-disable import/no-extraneous-dependencies */
 import spawn from "cross-spawn";
 import { yellow } from "picocolors";
 import type { PackageManager } from "./get-pkg-manager";
@@ -1,4 +1,3 @@
-/* eslint-disable import/no-extraneous-dependencies */
 import fs from "fs";
 import path from "path";
 import { blue, green } from "picocolors";
@@ -1,4 +1,3 @@
-/* eslint-disable import/no-extraneous-dependencies */
 import { execSync } from "child_process";
 import fs from "fs";

@@ -28,7 +28,7 @@ export async function askModelConfig({
 }: ModelConfigQuestionsParams): Promise<ModelConfig> {
  let modelProvider: ModelProvider = DEFAULT_MODEL_PROVIDER;
  if (askModels) {
-    let choices = [
+    const choices = [
      { title: "OpenAI", value: "openai" },
      { title: "Groq", value: "groq" },
      { title: "Ollama", value: "ollama" },
@@ -5,6 +5,7 @@ import { parse, stringify } from "smol-toml";
 import terminalLink from "terminal-link";
 import { isUvAvailable, tryUvSync } from "./uv";

+import { isCI } from "ci-info";
 import { assetRelocator, copy } from "./copy";
 import { templatesDir } from "./dir";
 import { Tool } from "./tools";
@@ -31,6 +32,7 @@ const getAdditionalDependencies = (
  tools?: Tool[],
  templateType?: TemplateType,
  observability?: TemplateObservability,
+  // eslint-disable-next-line max-params
 ) => {
  const dependencies: Dependency[] = [];

@@ -93,6 +95,10 @@ const getAdditionalDependencies = (
        name: "llama-index-vector-stores-chroma",
        version: ">=0.4.0,<0.5.0",
      });
+      dependencies.push({
+        name: "onnxruntime",
+        version: "<1.22.0",
+      });
      break;
    }
    case "weaviate": {
@@ -262,7 +268,7 @@ const getAdditionalDependencies = (
    if (observability === "traceloop") {
      dependencies.push({
        name: "traceloop-sdk",
-        version: ">=0.15.11,<0.16.0",
+        version: ">=0.15.11",
      });
    }
    if (observability === "llamatrace") {
@@ -273,6 +279,19 @@ const getAdditionalDependencies = (
    }
  }

+  // If app template is llama-index-server and CI and SERVER_PACKAGE_PATH is set,
+  // add @llamaindex/server to dependencies
+  if (
+    templateType === "llamaindexserver" &&
+    isCI &&
+    process.env.SERVER_PACKAGE_PATH
+  ) {
+    dependencies.push({
+      name: "llama-index-server",
+      version: `@file://${process.env.SERVER_PACKAGE_PATH}`,
+    });
+  }
+
  return dependencies;
 };

@@ -562,15 +581,21 @@ const installLlamaIndexServerTemplate = async ({
    process.exit(1);
  }

-  await copy("workflow.py", path.join(root, "app"), {
+  await copy("*.py", path.join(root, "app"), {
    parents: true,
-    cwd: path.join(templatesDir, "components", "workflows", "python", useCase),
+    cwd: path.join(templatesDir, "components", "use-cases", "python", useCase),
  });

  // Copy custom UI component code
  await copy(`*`, path.join(root, "components"), {
    parents: true,
-    cwd: path.join(templatesDir, "components", "ui", "workflows", useCase),
+    cwd: path.join(templatesDir, "components", "ui", "use-cases", useCase),
+  });
+
+  // Copy layout components to layout folder in root
+  await copy("*", path.join(root, "layout"), {
+    parents: true,
+    cwd: path.join(templatesDir, "components", "ui", "layout"),
  });

  if (useLlamaParse) {
@@ -601,7 +626,7 @@ const installLlamaIndexServerTemplate = async ({
  // Copy README.md
  await copy("README-template.md", path.join(root), {
    parents: true,
-    cwd: path.join(templatesDir, "components", "workflows", "python", useCase),
+    cwd: path.join(templatesDir, "components", "use-cases", "python", useCase),
    rename: assetRelocator,
  });
 };
@@ -672,6 +697,7 @@ export const installPythonTemplate = async ({
    dataSources,
    tools,
    template,
+    observability,
  );

  await addDependencies(root, addOnDependencies);
@@ -57,7 +57,9 @@ export type TemplateUseCase =
  | "form_filling"
  | "extractor"
  | "contract_review"
-  | "agentic_rag";
+  | "agentic_rag"
+  | "code_generator"
+  | "document_generator";
 // Config for both file and folder
 export type FileSourceConfig =
  | {
@@ -31,23 +31,30 @@ const installLlamaIndexServerTemplate = async ({
    process.exit(1);
  }

-  await copy("workflow.ts", path.join(root, "src", "app"), {
-    parents: true,
+  await copy("**", path.join(root), {
    cwd: path.join(
      templatesDir,
      "components",
-      "workflows",
+      "use-cases",
      "typescript",
      useCase,
    ),
+    rename: assetRelocator,
  });

-  // copy workflow UI components to output/components folder
+  // copy workflow UI components to components folder in root
  await copy("*", path.join(root, "components"), {
    parents: true,
-    cwd: path.join(templatesDir, "components", "ui", "workflows", useCase),
+    cwd: path.join(templatesDir, "components", "ui", "use-cases", useCase),
  });

+  // copy layout components to layout folder in root
+  await copy("*", path.join(root, "layout"), {
+    parents: true,
+    cwd: path.join(templatesDir, "components", "ui", "layout"),
+  });
+
+  // Override generate.ts if workflow use case doesn't use custom UI
  if (vectorDb === "llamacloud") {
    await copy("generate.ts", path.join(root, "src"), {
      parents: true,
@@ -74,18 +81,14 @@ const installLlamaIndexServerTemplate = async ({
      rename: () => "data.ts",
    });
  }
-  // Copy README.md
-  await copy("README-template.md", path.join(root), {
-    parents: true,
-    cwd: path.join(
-      templatesDir,
-      "components",
-      "workflows",
-      "typescript",
-      useCase,
-    ),
-    rename: assetRelocator,
-  });
+
+  // Simplify use case code
+  if (useCase === "code_generator" || useCase === "document_generator") {
+    // Artifact use case doesn't use index.
+    // We don't need data.ts, generate.ts
+    await fs.rm(path.join(root, "src", "app", "data.ts"));
+    // TODO: Remove generate index in generate.ts and package.json if possible
+  }
 };

 const installLegacyTSTemplate = async ({
@@ -390,7 +393,7 @@ const providerDependencies: {
  [key in ModelProvider]?: Record<string, string>;
 } = {
  openai: {
-    "@llamaindex/openai": "^0.2.0",
+    "@llamaindex/openai": "~0.4.0",
  },
  gemini: {
    "@llamaindex/google": "^0.2.0",
@@ -516,7 +519,7 @@ async function updatePackageJson({
  if (backend) {
    packageJson.dependencies = {
      ...packageJson.dependencies,
-      "@llamaindex/readers": "^2.0.0",
+      "@llamaindex/readers": "~3.1.4",
    };

    if (vectorDb && vectorDb in vectorDbDependencies) {
@@ -546,6 +549,16 @@ async function updatePackageJson({
    };
  }

+  // if having custom server package tgz file, use it for testing @llamaindex/server
+  const serverPackagePath = process.env.SERVER_PACKAGE_PATH;
+  if (serverPackagePath && template === "llamaindexserver") {
+    const relativePath = path.relative(process.cwd(), serverPackagePath);
+    packageJson.dependencies = {
+      ...packageJson.dependencies,
+      "@llamaindex/server": `file:${relativePath}`,
+    };
+  }
+
  await fs.writeFile(
    packageJsonFile,
    JSON.stringify(packageJson, null, 2) + os.EOL,
@@ -1,4 +1,3 @@
-// eslint-disable-next-line import/no-extraneous-dependencies
 import validateProjectName from "validate-npm-package-name";

 export function validateNpmName(name: string): {
@@ -1,4 +1,3 @@
-/* eslint-disable import/no-extraneous-dependencies */
 import { execSync } from "child_process";
 import { Command } from "commander";
 import fs from "fs";
@@ -197,7 +196,7 @@ const program = new Command(packageJson.name)
    "--pro",
    `

-  Allow interactive selection of all features.
+  Deprecated: Allow interactive selection of all features.
 `,
    false,
  )
@@ -1,6 +1,6 @@
 {
  "name": "create-llama",
-  "version": "0.5.11",
+  "version": "0.5.21",
  "description": "Create LlamaIndex-powered apps with one command",
  "keywords": [
    "rag",
@@ -31,9 +31,6 @@
    "e2e": "playwright test",
    "e2e:python": "playwright test e2e/shared e2e/python",
    "e2e:typescript": "playwright test e2e/shared e2e/typescript",
-    "format": "prettier --ignore-unknown --cache --check .",
-    "format:write": "prettier --ignore-unknown --write .",
-    "lint": "eslint . --ignore-pattern dist --ignore-pattern e2e/cache",
    "pack-install": "bash ./scripts/pack.sh"
  },
  "dependencies": {
@@ -66,10 +63,6 @@
    "yaml": "2.4.1"
  },
  "devDependencies": {
-    "eslint": "^8.56.0",
-    "eslint-config-prettier": "^8.10.0",
-    "prettier": "^3.2.5",
-    "prettier-plugin-organize-imports": "^3.2.4",
    "@playwright/test": "^1.41.1",
    "@vercel/ncc": "0.38.1",
    "rimraf": "^5.0.5",
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { defineConfig, devices } from "@playwright/test";

 export default defineConfig({
@@ -1,3 +0,0 @@
-module.exports = {
-  plugins: ["prettier-plugin-organize-imports"],
-};
@@ -6,7 +6,7 @@ const defaults: Omit<QuestionArgs, "modelConfig"> = {
  framework: "nextjs",
  ui: "shadcn",
  frontend: false,
-  llamaCloudKey: "",
+  llamaCloudKey: undefined,
  useLlamaParse: false,
  communityProjectConfig: undefined,
  llamapack: "",
@@ -1,4 +1,5 @@
 import ciInfo from "ci-info";
+import { bold, yellow } from "picocolors";
 import { getCIQuestionResults } from "./ci";
 import { askProQuestions } from "./questions";
 import { askSimpleQuestions } from "./simple";
@@ -13,6 +14,12 @@ export const askQuestions = async (
    return await getCIQuestionResults(args);
  } else if (args.pro) {
    // TODO: refactor pro questions to return a result object
+    console.log(
+      yellow(
+        `Pro mode is deprecated. Please use the new templates using the ${bold("LlamaIndexServer")} by not specifying pro mode.`,
+      ),
+    );
+
    await askProQuestions(args);
    return args as unknown as QuestionResults;
  }
@@ -6,7 +6,12 @@ import { ModelConfig, TemplateFramework } from "../helpers/types";
 import { PureQuestionArgs, QuestionResults } from "./types";
 import { askPostInstallAction, questionHandlers } from "./utils";

-type AppType = "agentic_rag" | "financial_report" | "deep_research";
+type AppType =
+  | "agentic_rag"
+  | "financial_report"
+  | "deep_research"
+  | "code_generator"
+  | "document_generator";

 type SimpleAnswers = {
  appType: AppType;
@@ -42,6 +47,16 @@ export const askSimpleQuestions = async (
          description:
            "Researches and analyzes provided documents from multiple perspectives, generating a comprehensive report with citations to support key findings and insights.",
        },
+        {
+          title: "Code Generator",
+          value: "code_generator",
+          description: "Build a Vercel v0 styled code generator.",
+        },
+        {
+          title: "Document Generator",
+          value: "document_generator",
+          description: "Build a OpenAI canvas-styled document generator.",
+        },
      ],
    },
    questionHandlers,
@@ -52,35 +67,35 @@ export const askSimpleQuestions = async (

  let useLlamaCloud = false;

-  if (appType !== "extractor" && appType !== "contract_review") {
-    const { language: newLanguage } = await prompts(
-      {
-        type: "select",
-        name: "language",
-        message: "What language do you want to use?",
-        choices: [
-          { title: "Python (FastAPI)", value: "fastapi" },
-          { title: "Typescript (NextJS)", value: "nextjs" },
-        ],
-      },
-      questionHandlers,
-    );
-    language = newLanguage;
-  }
-
-  const { useLlamaCloud: newUseLlamaCloud } = await prompts(
+  const { language: newLanguage } = await prompts(
    {
-      type: "toggle",
-      name: "useLlamaCloud",
-      message: "Do you want to use LlamaCloud services?",
-      initial: false,
-      active: "Yes",
-      inactive: "No",
-      hint: "see https://www.llamaindex.ai/enterprise for more info",
+      type: "select",
+      name: "language",
+      message: "What language do you want to use?",
+      choices: [
+        { title: "Python (FastAPI)", value: "fastapi" },
+        { title: "Typescript (NextJS)", value: "nextjs" },
+      ],
    },
    questionHandlers,
  );
-  useLlamaCloud = newUseLlamaCloud;
+  language = newLanguage;
+
+  if (appType !== "code_generator" && appType !== "document_generator") {
+    const { useLlamaCloud: newUseLlamaCloud } = await prompts(
+      {
+        type: "toggle",
+        name: "useLlamaCloud",
+        message: "Do you want to use LlamaCloud services?",
+        initial: false,
+        active: "Yes",
+        inactive: "No",
+        hint: "see https://www.llamaindex.ai/enterprise for more info",
+      },
+      questionHandlers,
+    );
+    useLlamaCloud = newUseLlamaCloud;
+  }

  if (useLlamaCloud && !llamaCloudKey) {
    // Ask for LlamaCloud API key, if not set
@@ -111,10 +126,10 @@ const convertAnswers = async (
  args: PureQuestionArgs,
  answers: SimpleAnswers,
 ): Promise<QuestionResults> => {
-  const MODEL_GPT4o: ModelConfig = {
+  const MODEL_GPT41: ModelConfig = {
    provider: "openai",
    apiKey: args.openAiKey,
-    model: "gpt-4o",
+    model: "gpt-4.1",
    embeddingModel: "text-embedding-3-large",
    dimensions: 1536,
    isConfigured(): boolean {
@@ -135,13 +150,25 @@ const convertAnswers = async (
      template: "llamaindexserver",
      dataSources: EXAMPLE_10K_SEC_FILES,
      tools: getTools(["interpreter", "document_generator"]),
-      modelConfig: MODEL_GPT4o,
+      modelConfig: MODEL_GPT41,
    },
    deep_research: {
      template: "llamaindexserver",
      dataSources: EXAMPLE_10K_SEC_FILES,
      tools: [],
-      modelConfig: MODEL_GPT4o,
+      modelConfig: MODEL_GPT41,
+    },
+    code_generator: {
+      template: "llamaindexserver",
+      dataSources: [],
+      tools: [],
+      modelConfig: MODEL_GPT41,
+    },
+    document_generator: {
+      template: "llamaindexserver",
+      dataSources: [],
+      tools: [],
+      modelConfig: MODEL_GPT41,
    },
  };

@@ -191,7 +191,7 @@ export class InterpreterTool implements BaseTool<InterpreterParameter> {
          case "png":
          case "jpeg":
          case "svg":
-          case "pdf":
+          case "pdf": {
            const { filename } = this.saveToDisk(data, ext);
            output.push({
              type: ext as InterpreterExtraType,
@@ -199,6 +199,7 @@ export class InterpreterTool implements BaseTool<InterpreterParameter> {
              url: this.getFileUrl(filename),
            });
            break;
+          }
          default:
            output.push({
              type: ext as InterpreterExtraType,
@@ -1,5 +1,9 @@
-import { Document, LLamaCloudFileService, VectorStoreIndex } from "llamaindex";
-import { LlamaCloudIndex } from "llamaindex/cloud/LlamaCloudIndex";
+import {
+  Document,
+  LLamaCloudFileService,
+  LlamaCloudIndex,
+  VectorStoreIndex,
+} from "llamaindex";
 import { DocumentFile } from "../streaming/annotations";
 import { parseFile, storeFile } from "./helper";
 import { runPipeline } from "./pipeline";
@@ -10,8 +10,9 @@ dependencies = [
    "python-dotenv>=1.0.0",
    "pydantic<2.10",
    "llama-index>=0.12.1",
+    "llama-parse>=0.6.21,<0.7.0",
    "cachetools>=5.3.3",
-    "reflex>=0.6.2.post1",
+    "reflex==0.7.10",
 ]

 [project.scripts]
@@ -11,8 +11,9 @@ dependencies = [
    "python-dotenv>=1.0.0",
    "pydantic<2.10",
    "llama-index>=0.12.1",
+    "llama-parse>=0.6.21,<0.7.0",
    "cachetools>=5.3.3",
-    "reflex>=0.6.2.post1",
+    "reflex==0.7.10",
 ]

 [project.scripts]
@@ -6,7 +6,7 @@ import { Message } from "./chat-messages";
 export default function ChatAvatar(message: Message) {
  if (message.role === "user") {
    return (
-      <div className="flex h-8 w-8 shrink-0 select-none items-center justify-center rounded-md border shadow bg-background">
+      <div className="bg-background flex h-8 w-8 shrink-0 select-none items-center justify-center rounded-md border shadow">
        <svg
          xmlns="http://www.w3.org/2000/svg"
          viewBox="0 0 256 256"
@@ -20,7 +20,7 @@ export default function ChatAvatar(message: Message) {
  }

  return (
-    <div className="flex h-8 w-8 shrink-0 select-none items-center justify-center rounded-md border  bg-black text-white">
+    <div className="flex h-8 w-8 shrink-0 select-none items-center justify-center rounded-md border bg-black text-white">
      <Image
        className="rounded-md"
        src="/llama.png"
@@ -23,20 +23,20 @@ export default function ChatInput(props: ChatInputProps) {
    <>
      <form
        onSubmit={props.handleSubmit}
-        className="flex items-start justify-between w-full max-w-5xl p-4 bg-white rounded-xl shadow-xl gap-4"
+        className="flex w-full max-w-5xl items-start justify-between gap-4 rounded-xl bg-white p-4 shadow-xl"
      >
        <input
          autoFocus
          name="message"
          placeholder="Type a message"
-          className="w-full p-4 rounded-xl shadow-inner flex-1"
+          className="w-full flex-1 rounded-xl p-4 shadow-inner"
          value={props.input}
          onChange={props.handleInputChange}
        />
        <button
          disabled={props.isLoading}
          type="submit"
-          className="p-4 text-white rounded-xl shadow-xl bg-gradient-to-r from-cyan-500 to-sky-500 disabled:opacity-50 disabled:cursor-not-allowed"
+          className="rounded-xl bg-gradient-to-r from-cyan-500 to-sky-500 p-4 text-white shadow-xl disabled:cursor-not-allowed disabled:opacity-50"
        >
          Send message
        </button>
@@ -7,7 +7,7 @@ export default function ChatItem(message: Message) {
  return (
    <div className="flex items-start gap-4 pt-5">
      <ChatAvatar {...message} />
-      <p className="break-words whitespace-pre-wrap">{message.content}</p>
+      <p className="whitespace-pre-wrap break-words">{message.content}</p>
    </div>
  );
 }
@@ -39,7 +39,7 @@ export default function ChatMessages({

  return (
    <div
-      className="flex-1 w-full max-w-5xl p-4 bg-white rounded-xl shadow-xl overflow-auto"
+      className="w-full max-w-5xl flex-1 overflow-auto rounded-xl bg-white p-4 shadow-xl"
      ref={scrollableChatContainerRef}
    >
      <div className="flex flex-col gap-5 divide-y">
@@ -0,0 +1,40 @@
+"use client";
+
+import { Sparkles, Star } from "lucide-react";
+
+export default function Header() {
+  return (
+    <div className="flex items-center justify-between p-2 px-4">
+      <div className="flex items-center gap-2">
+        <Sparkles className="size-4" />
+        <h1 className="font-semibold">LlamaIndex App</h1>
+      </div>
+      <div className="flex items-center justify-end gap-4">
+        <div className="flex items-center gap-2">
+          <a
+            href="https://www.llamaindex.ai/"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="text-sm text-gray-600 hover:text-gray-800 dark:text-gray-400 dark:hover:text-gray-200"
+          >
+            Built by LlamaIndex
+          </a>
+          <img
+            className="h-[24px] w-[24px] rounded-sm"
+            src="/llama.png"
+            alt="Llama Logo"
+          />
+        </div>
+        <a
+          href="https://github.com/run-llama/LlamaIndexTS"
+          target="_blank"
+          rel="noopener noreferrer"
+          className="hover:bg-accent flex items-center gap-2 rounded-md border border-gray-300 px-2 py-1 text-sm"
+        >
+          <Star className="size-4" />
+          Star on GitHub
+        </a>
+      </div>
+    </div>
+  );
+}
@@ -0,0 +1,132 @@
+import { Badge } from "@/components/ui/badge";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Progress } from "@/components/ui/progress";
+import { Skeleton } from "@/components/ui/skeleton";
+import { cn } from "@/lib/utils";
+import { Markdown } from "@llamaindex/chat-ui/widgets";
+import { ListChecks, Loader2, Wand2 } from "lucide-react";
+import { useEffect, useState } from "react";
+
+const STAGE_META = {
+  plan: {
+    icon: ListChecks,
+    badgeText: "Step 1/2: Planning",
+    gradient: "from-blue-100 via-blue-50 to-white",
+    progress: 33,
+    iconBg: "bg-blue-100 text-blue-600",
+    badge: "bg-blue-100 text-blue-700",
+  },
+  generate: {
+    icon: Wand2,
+    badgeText: "Step 2/2: Generating",
+    gradient: "from-violet-100 via-violet-50 to-white",
+    progress: 66,
+    iconBg: "bg-violet-100 text-violet-600",
+    badge: "bg-violet-100 text-violet-700",
+  },
+};
+
+function ArtifactWorkflowCard({ event }) {
+  const [visible, setVisible] = useState(event?.state !== "completed");
+  const [fade, setFade] = useState(false);
+
+  useEffect(() => {
+    if (event?.state === "completed") {
+      setVisible(false);
+    } else {
+      setVisible(true);
+      setFade(false);
+    }
+  }, [event?.state]);
+
+  if (!event || !visible) return null;
+
+  const { state, requirement } = event;
+  const meta = STAGE_META[state];
+
+  if (!meta) return null;
+
+  return (
+    <div className="flex min-h-[180px] w-full items-center justify-center py-2">
+      <Card
+        className={cn(
+          "w-full rounded-xl shadow-md transition-all duration-500",
+          "border-0",
+          fade && "pointer-events-none opacity-0",
+          `bg-gradient-to-br ${meta.gradient}`,
+        )}
+        style={{
+          boxShadow:
+            "0 2px 12px 0 rgba(80, 80, 120, 0.08), 0 1px 3px 0 rgba(80, 80, 120, 0.04)",
+        }}
+      >
+        <CardHeader className="flex flex-row items-center gap-2 px-3 pb-1 pt-2">
+          <div
+            className={cn(
+              "flex items-center justify-center rounded-full p-1",
+              meta.iconBg,
+            )}
+          >
+            <meta.icon className="h-5 w-5" />
+          </div>
+          <CardTitle className="flex items-center gap-2 text-base font-semibold">
+            <Badge className={cn("ml-1", meta.badge, "px-2 py-0.5 text-xs")}>
+              {meta.badgeText}
+            </Badge>
+          </CardTitle>
+        </CardHeader>
+        <CardContent className="px-3 py-1">
+          {state === "plan" && (
+            <div className="flex flex-col items-center gap-2 py-2">
+              <Loader2 className="mb-1 h-6 w-6 animate-spin text-blue-400" />
+              <div className="text-center text-sm font-medium text-blue-900">
+                Analyzing your request...
+              </div>
+              <Skeleton className="mt-1 h-3 w-1/2 rounded-full" />
+            </div>
+          )}
+          {state === "generate" && (
+            <div className="flex flex-col gap-2 py-2">
+              <div className="flex items-center gap-1">
+                <Loader2 className="h-4 w-4 animate-spin text-violet-400" />
+                <span className="text-sm font-medium text-violet-900">
+                  Working on the requirement:
+                </span>
+              </div>
+              <div className="max-h-24 overflow-auto rounded-lg border border-violet-200 bg-violet-50 px-2 py-1 text-xs">
+                {requirement ? (
+                  <Markdown content={requirement} />
+                ) : (
+                  <span className="italic text-violet-400">
+                    No requirements available yet.
+                  </span>
+                )}
+              </div>
+            </div>
+          )}
+        </CardContent>
+        <div className="px-3 pb-2 pt-1">
+          <Progress
+            value={meta.progress}
+            className={cn(
+              "h-1 rounded-full bg-gray-200",
+              state === "plan" && "bg-blue-200",
+              state === "generate" && "bg-violet-200",
+            )}
+          />
+        </div>
+      </Card>
+    </div>
+  );
+}
+
+export default function Component({ events }) {
+  const aggregateEvents = () => {
+    if (!events || events.length === 0) return null;
+    return events[events.length - 1];
+  };
+
+  const event = aggregateEvents();
+
+  return <ArtifactWorkflowCard event={event} />;
+}
@@ -97,7 +97,7 @@ export default function Component({ events }) {
      case "pending":
        return <Clock className="h-4 w-4 text-gray-400" />;
      case "inprogress":
-        return <Loader2 className="h-4 w-4 text-blue-500 animate-spin" />;
+        return <Loader2 className="h-4 w-4 animate-spin text-blue-500" />;
      case "done":
        return <CheckCircle className="h-4 w-4 text-green-500" />;
      case "error":
@@ -140,9 +140,9 @@ export default function Component({ events }) {
  };

  return (
-    <div className="w-full max-w-4xl mx-auto space-y-6 p-4">
+    <div className="mx-auto w-full max-w-4xl space-y-6 p-4">
      {/* Header */}
-      <div className="flex items-center justify-between mb-6">
+      <div className="mb-6 flex items-center justify-between">
        <h1 className="text-2xl font-bold">DeepResearch Workflow</h1>
        <div className="flex items-center space-x-2">
          <Badge
@@ -188,7 +188,7 @@ export default function Component({ events }) {
        className={cn(
          "border-2 transition-all duration-300",
          retrieve?.state === "inprogress"
-            ? "border-blue-500 shadow-blue-100 shadow-lg"
+            ? "border-blue-500 shadow-lg shadow-blue-100"
            : retrieve?.state === "done"
              ? "border-green-500"
              : retrieve?.state === "error"
@@ -231,7 +231,7 @@ export default function Component({ events }) {
          className={cn(
            "border-2 transition-all duration-300",
            analyze?.state === "inprogress"
-              ? "border-blue-500 shadow-blue-100 shadow-lg"
+              ? "border-blue-500 shadow-lg shadow-blue-100"
              : analyze?.state === "done"
                ? "border-green-500"
                : analyze?.state === "error"
@@ -288,9 +288,9 @@ export default function Component({ events }) {
                  key={answer.id}
                  value={answer.id}
                  className={cn(
-                    "mb-4 border rounded-lg overflow-hidden",
+                    "mb-4 overflow-hidden rounded-lg border",
                    answer.state === "inprogress"
-                      ? "border-blue-500 shadow-blue-100 shadow-sm"
+                      ? "border-blue-500 shadow-sm shadow-blue-100"
                      : answer.state === "done"
                        ? "border-green-100"
                        : answer.state === "error"
@@ -309,7 +309,7 @@ export default function Component({ events }) {
                      <Badge
                        variant="outline"
                        className={cn(
-                          "ml-auto flex items-center space-x-1 shrink-0",
+                          "ml-auto flex shrink-0 items-center space-x-1",
                          answer.state === "inprogress"
                            ? "text-blue-500"
                            : answer.state === "done"
@@ -327,7 +327,7 @@ export default function Component({ events }) {
                  <AccordionContent className="px-4 pb-4 pt-1">
                    <div
                      className={cn(
-                        "p-3 rounded-md",
+                        "rounded-md p-3",
                        answer.state === "done"
                          ? "bg-green-50"
                          : answer.state === "inprogress"
@@ -0,0 +1,132 @@
+import { Badge } from "@/components/ui/badge";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Progress } from "@/components/ui/progress";
+import { Skeleton } from "@/components/ui/skeleton";
+import { cn } from "@/lib/utils";
+import { Markdown } from "@llamaindex/chat-ui/widgets";
+import { ListChecks, Loader2, Wand2 } from "lucide-react";
+import { useEffect, useState } from "react";
+
+const STAGE_META = {
+  plan: {
+    icon: ListChecks,
+    badgeText: "Step 1/2: Planning",
+    gradient: "from-blue-100 via-blue-50 to-white",
+    progress: 33,
+    iconBg: "bg-blue-100 text-blue-600",
+    badge: "bg-blue-100 text-blue-700",
+  },
+  generate: {
+    icon: Wand2,
+    badgeText: "Step 2/2: Generating",
+    gradient: "from-violet-100 via-violet-50 to-white",
+    progress: 66,
+    iconBg: "bg-violet-100 text-violet-600",
+    badge: "bg-violet-100 text-violet-700",
+  },
+};
+
+function ArtifactWorkflowCard({ event }) {
+  const [visible, setVisible] = useState(event?.state !== "completed");
+  const [fade, setFade] = useState(false);
+
+  useEffect(() => {
+    if (event?.state === "completed") {
+      setVisible(false);
+    } else {
+      setVisible(true);
+      setFade(false);
+    }
+  }, [event?.state]);
+
+  if (!event || !visible) return null;
+
+  const { state, requirement } = event;
+  const meta = STAGE_META[state];
+
+  if (!meta) return null;
+
+  return (
+    <div className="flex min-h-[180px] w-full items-center justify-center py-2">
+      <Card
+        className={cn(
+          "w-full rounded-xl shadow-md transition-all duration-500",
+          "border-0",
+          fade && "pointer-events-none opacity-0",
+          `bg-gradient-to-br ${meta.gradient}`,
+        )}
+        style={{
+          boxShadow:
+            "0 2px 12px 0 rgba(80, 80, 120, 0.08), 0 1px 3px 0 rgba(80, 80, 120, 0.04)",
+        }}
+      >
+        <CardHeader className="flex flex-row items-center gap-2 px-3 pb-1 pt-2">
+          <div
+            className={cn(
+              "flex items-center justify-center rounded-full p-1",
+              meta.iconBg,
+            )}
+          >
+            <meta.icon className="h-5 w-5" />
+          </div>
+          <CardTitle className="flex items-center gap-2 text-base font-semibold">
+            <Badge className={cn("ml-1", meta.badge, "px-2 py-0.5 text-xs")}>
+              {meta.badgeText}
+            </Badge>
+          </CardTitle>
+        </CardHeader>
+        <CardContent className="px-3 py-1">
+          {state === "plan" && (
+            <div className="flex flex-col items-center gap-2 py-2">
+              <Loader2 className="mb-1 h-6 w-6 animate-spin text-blue-400" />
+              <div className="text-center text-sm font-medium text-blue-900">
+                Analyzing your request...
+              </div>
+              <Skeleton className="mt-1 h-3 w-1/2 rounded-full" />
+            </div>
+          )}
+          {state === "generate" && (
+            <div className="flex flex-col gap-2 py-2">
+              <div className="flex items-center gap-1">
+                <Loader2 className="h-4 w-4 animate-spin text-violet-400" />
+                <span className="text-sm font-medium text-violet-900">
+                  Working on the requirement:
+                </span>
+              </div>
+              <div className="max-h-24 overflow-auto rounded-lg border border-violet-200 bg-violet-50 px-2 py-1 text-xs">
+                {requirement ? (
+                  <Markdown content={requirement} />
+                ) : (
+                  <span className="italic text-violet-400">
+                    No requirements available yet.
+                  </span>
+                )}
+              </div>
+            </div>
+          )}
+        </CardContent>
+        <div className="px-3 pb-2 pt-1">
+          <Progress
+            value={meta.progress}
+            className={cn(
+              "h-1 rounded-full bg-gray-200",
+              state === "plan" && "bg-blue-200",
+              state === "generate" && "bg-violet-200",
+            )}
+          />
+        </div>
+      </Card>
+    </div>
+  );
+}
+
+export default function Component({ events }) {
+  const aggregateEvents = () => {
+    if (!events || events.length === 0) return null;
+    return events[events.length - 1];
+  };
+
+  const event = aggregateEvents();
+
+  return <ArtifactWorkflowCard event={event} />;
+}
@@ -3,9 +3,12 @@ from typing import Optional
 from app.index import get_index
 from llama_index.core.agent.workflow import AgentWorkflow
 from llama_index.core.settings import Settings
-from llama_index.llms.openai import OpenAI
 from llama_index.server.api.models import ChatRequest
 from llama_index.server.tools.index import get_query_engine_tool
+from llama_index.server.tools.index.citation import (
+    CITATION_SYSTEM_PROMPT,
+    enable_citation,
+)


 def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow:
@@ -14,9 +17,16 @@ def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow
        raise RuntimeError(
            "Index not found! Please run `uv run generate` to index the data first."
        )
-    query_tool = get_query_engine_tool(index=index)
+    # Create a query tool with citations enabled
+    query_tool = enable_citation(get_query_engine_tool(index=index))
+
+    # Define the system prompt for the agent
+    # Append the citation system prompt to the system prompt
+    system_prompt = """You are a helpful assistant"""
+    system_prompt += CITATION_SYSTEM_PROMPT
+
    return AgentWorkflow.from_tools_or_functions(
        tools_or_functions=[query_tool],
-        llm=Settings.llm or OpenAI(model="gpt-4o-mini"),
-        system_prompt="You are a helpful assistant.",
+        llm=Settings.llm,
+        system_prompt=system_prompt,
    )
@@ -0,0 +1,65 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/).
+
+## Getting Started
+
+First, setup the environment with uv:
+
+> **_Note:_** This step is not needed if you are using the dev-container.
+
+```shell
+uv sync
+```
+
+Then check the parameters that have been pre-configured in the `.env` file in this directory.
+Make sure you have set the `OPENAI_API_KEY` for the LLM.
+
+Then, run the development server:
+
+```shell
+uv run fastapi dev
+```
+
+Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
+
+To start the app optimized for **production**, run:
+
+```
+uv run fastapi run
+```
+
+## Configure LLM and Embedding Model
+
+You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
+
+## Use Case
+AI-powered code generator that can help you generate app with a chat interface, code editor and app preview.
+
+To update the workflow, you can modify the code in [`workflow.py`](app/workflow.py).
+
+You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
+
+```
+curl --location 'localhost:8000/api/chat' \
+--header 'Content-Type: application/json' \
+--data '{ "messages": [{ "role": "user", "content": "Create a report comparing the finances of Apple and Tesla" }] }'
+```
+
+## Customize the UI
+
+To customize the UI, you can start by modifying the [./components/ui_event.jsx](./components/ui_event.jsx) file.
+
+You can also generate a new code for the workflow using LLM by running the following command:
+
+```
+uv run generate_ui
+```
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
+- [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
+- [LlamaIndex Server](https://pypi.org/project/llama-index-server/)
+
+You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
@@ -0,0 +1,375 @@
+import re
+import time
+from typing import Any, Literal, Optional, Union
+
+from llama_index.core.chat_engine.types import ChatMessage
+from llama_index.core.llms import LLM
+from llama_index.core.memory import ChatMemoryBuffer
+from llama_index.core.prompts import PromptTemplate
+from llama_index.llms.openai import OpenAI
+from llama_index.core.workflow import (
+    Context,
+    Event,
+    StartEvent,
+    StopEvent,
+    Workflow,
+    step,
+)
+from llama_index.server.api.models import (
+    Artifact,
+    ArtifactEvent,
+    ArtifactType,
+    ChatRequest,
+    CodeArtifactData,
+    UIEvent,
+)
+from llama_index.server.api.utils import get_last_artifact
+from pydantic import BaseModel, Field
+
+
+def create_workflow(chat_request: ChatRequest) -> Workflow:
+    workflow = CodeArtifactWorkflow(
+        llm=OpenAI(model="gpt-4.1"),
+        chat_request=chat_request,
+        timeout=120.0,
+    )
+    return workflow
+
+
+class Requirement(BaseModel):
+    next_step: Literal["answering", "coding"]
+    language: Optional[str] = None
+    file_name: Optional[str] = None
+    requirement: str
+
+
+class PlanEvent(Event):
+    user_msg: str
+    context: Optional[str] = None
+
+
+class GenerateArtifactEvent(Event):
+    requirement: Requirement
+
+
+class SynthesizeAnswerEvent(Event):
+    pass
+
+
+class UIEventData(BaseModel):
+    """
+    Event data for updating workflow status to the UI.
+    """
+
+    state: Literal["plan", "generate", "completed"] = Field(
+        description="The current state of the workflow. "
+        "plan: analyze and create a plan for the next step. "
+        "generate: generate the artifact based on the requirement from the previous step. "
+        "completed: the workflow is completed. "
+    )
+    requirement: Optional[str] = Field(
+        description="The requirement for generating the artifact. ",
+        default=None,
+    )
+
+
+class CodeArtifactWorkflow(Workflow):
+    """
+    A simple workflow that help generate/update the chat artifact (code, document)
+    e.g: Help create a NextJS app.
+         Update the generated code with the user's feedback.
+         Generate a guideline for the app,...
+    """
+
+    def __init__(
+        self,
+        llm: LLM,
+        chat_request: ChatRequest,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            llm: The LLM to use.
+            chat_request: The chat request from the chat app to use.
+        """
+        super().__init__(**kwargs)
+        self.llm = llm
+        self.chat_request = chat_request
+        self.last_artifact = get_last_artifact(chat_request)
+
+    @step
+    async def prepare_chat_history(self, ctx: Context, ev: StartEvent) -> PlanEvent:
+        user_msg = ev.user_msg
+        if user_msg is None:
+            raise ValueError("user_msg is required to run the workflow")
+        await ctx.set("user_msg", user_msg)
+        chat_history = ev.chat_history or []
+        chat_history.append(
+            ChatMessage(
+                role="user",
+                content=user_msg,
+            )
+        )
+        memory = ChatMemoryBuffer.from_defaults(
+            chat_history=chat_history,
+            llm=self.llm,
+        )
+        await ctx.set("memory", memory)
+        return PlanEvent(
+            user_msg=user_msg,
+            context=str(self.last_artifact.model_dump_json())
+            if self.last_artifact
+            else "",
+        )
+
+    @step
+    async def planning(
+        self, ctx: Context, event: PlanEvent
+    ) -> Union[GenerateArtifactEvent, SynthesizeAnswerEvent]:
+        """
+        Based on the conversation history and the user's request
+        this step will help to provide a good next step for the code or document generation.
+        """
+        ctx.write_event_to_stream(
+            UIEvent(
+                type="ui_event",
+                data=UIEventData(
+                    state="plan",
+                    requirement=None,
+                ),
+            )
+        )
+        prompt = PromptTemplate("""
+        You are a product analyst responsible for analyzing the user's request and providing the next step for code or document generation.
+        You are helping user with their code artifact. To update the code, you need to plan a coding step.
+    
+        Follow these instructions:
+        1. Carefully analyze the conversation history and the user's request to determine what has been done and what the next step should be.
+        2. The next step must be one of the following two options:
+           - "coding": To make the changes to the current code.
+           - "answering": If you don't need to update the current code or need clarification from the user.
+        Important: Avoid telling the user to update the code themselves, you are the one who will update the code (by planning a coding step).
+        3. If the next step is "coding", you may specify the language ("typescript" or "python") and file_name if known, otherwise set them to null. 
+        4. The requirement must be provided clearly what is the user request and what need to be done for the next step in details
+           as precise and specific as possible, don't be stingy with in the requirement.
+        5. If the next step is "answering", set language and file_name to null, and the requirement should describe what to answer or explain to the user.
+        6. Be concise; only return the requirements for the next step.
+        7. The requirements must be in the following format:
+           ```json
+           {
+               "next_step": "answering" | "coding",
+               "language": "typescript" | "python" | null,
+               "file_name": string | null,
+               "requirement": string
+           }
+           ```
+
+        ## Example 1:
+        User request: Create a calculator app.
+        You should return:
+        ```json
+        {
+            "next_step": "coding",
+            "language": "typescript",
+            "file_name": "calculator.tsx",
+            "requirement": "Generate code for a calculator app that has a simple UI with a display and button layout. The display should show the current input and the result. The buttons should include basic operators, numbers, clear, and equals. The calculation should work correctly."
+        }
+        ```
+
+        ## Example 2:
+        User request: Explain how the game loop works.
+        Context: You have already generated the code for a snake game.
+        You should return:
+        ```json
+        {
+            "next_step": "answering",
+            "language": null,
+            "file_name": null,
+            "requirement": "The user is asking about the game loop. Explain how the game loop works."
+        }
+        ```
+
+        {context}
+
+        Now, plan the user's next step for this request:
+        {user_msg}
+        """).format(
+            context=""
+            if event.context is None
+            else f"## The context is: \n{event.context}\n",
+            user_msg=event.user_msg,
+        )
+        response = await self.llm.acomplete(
+            prompt=prompt,
+            formatted=True,
+        )
+        # parse the response to Requirement
+        # 1. use regex to find the json block
+        json_block = re.search(
+            r"```(?:json)?\s*([\s\S]*?)\s*```", response.text, re.IGNORECASE
+        )
+        if json_block is None:
+            raise ValueError("No JSON block found in the response.")
+        # 2. parse the json block to Requirement
+        requirement = Requirement.model_validate_json(json_block.group(1).strip())
+        ctx.write_event_to_stream(
+            UIEvent(
+                type="ui_event",
+                data=UIEventData(
+                    state="generate",
+                    requirement=requirement.requirement,
+                ),
+            )
+        )
+        # Put the planning result to the memory
+        # useful for answering step
+        memory: ChatMemoryBuffer = await ctx.get("memory")
+        memory.put(
+            ChatMessage(
+                role="assistant",
+                content=f"The plan for next step: \n{response.text}",
+            )
+        )
+        await ctx.set("memory", memory)
+        if requirement.next_step == "coding":
+            return GenerateArtifactEvent(
+                requirement=requirement,
+            )
+        else:
+            return SynthesizeAnswerEvent()
+
+    @step
+    async def generate_artifact(
+        self, ctx: Context, event: GenerateArtifactEvent
+    ) -> SynthesizeAnswerEvent:
+        """
+        Generate the code based on the user's request.
+        """
+        ctx.write_event_to_stream(
+            UIEvent(
+                type="ui_event",
+                data=UIEventData(
+                    state="generate",
+                    requirement=event.requirement.requirement,
+                ),
+            )
+        )
+        prompt = PromptTemplate("""
+         You are a skilled developer who can help user with coding.
+         You are given a task to generate or update a code for a given requirement.
+
+         ## Follow these instructions:
+         **1. Carefully read the user's requirements.** 
+            If any details are ambiguous or missing, make reasonable assumptions and clearly reflect those in your output.
+            If the previous code is provided:
+            + Carefully analyze the code with the request to make the right changes.
+            + Avoid making a lot of changes from the previous code if the request is not to write the code from scratch again.
+         **2. For code requests:**
+            - If the user does not specify a framework or language, default to a React component using the Next.js framework.
+            - For Next.js, use Shadcn UI components, Typescript, @types/node, @types/react, @types/react-dom, PostCSS, and TailwindCSS.
+            The import pattern should be:
+            ```
+            import { ComponentName } from "@/components/ui/component-name"
+            import { Markdown } from "@llamaindex/chat-ui"
+            import { cn } from "@/lib/utils"
+            ```
+            - Ensure the code is idiomatic, production-ready, and includes necessary imports.
+            - Only generate code relevant to the user's request—do not add extra boilerplate.
+         **3. Don't be verbose on response**
+            - No other text or comments only return the code which wrapped by ```language``` block.
+            - If the user's request is to update the code, only return the updated code.
+         **4. Only the following languages are allowed: "typescript", "python".**
+         **5. If there is no code to update, return the reason without any code block.**
+            
+         ## Example:
+         ```typescript
+         import React from "react";
+         import { Button } from "@/components/ui/button";
+         import { cn } from "@/lib/utils";
+
+         export default function MyComponent() {
+         return (
+            <div className="flex flex-col items-center justify-center h-screen">
+               <Button>Click me</Button>
+            </div>
+         );
+         }
+
+         The previous code is:
+         {previous_artifact}
+
+         Now, i have to generate the code for the following requirement:
+         {requirement}
+         ```
+        """).format(
+            previous_artifact=self.last_artifact.model_dump_json()
+            if self.last_artifact
+            else "",
+            requirement=event.requirement,
+        )
+        response = await self.llm.acomplete(
+            prompt=prompt,
+            formatted=True,
+        )
+        # Extract the code from the response
+        language_pattern = r"```(\w+)([\s\S]*)```"
+        code_match = re.search(language_pattern, response.text)
+        if code_match is None:
+            return SynthesizeAnswerEvent()
+        else:
+            code = code_match.group(2).strip()
+        # Put the generated code to the memory
+        memory: ChatMemoryBuffer = await ctx.get("memory")
+        memory.put(
+            ChatMessage(
+                role="assistant",
+                content=f"Updated the code: \n{response.text}",
+            )
+        )
+        # To show the Canvas panel for the artifact
+        ctx.write_event_to_stream(
+            ArtifactEvent(
+                data=Artifact(
+                    type=ArtifactType.CODE,
+                    created_at=int(time.time()),
+                    data=CodeArtifactData(
+                        language=event.requirement.language or "",
+                        file_name=event.requirement.file_name or "",
+                        code=code,
+                    ),
+                ),
+            )
+        )
+        return SynthesizeAnswerEvent()
+
+    @step
+    async def synthesize_answer(
+        self, ctx: Context, event: SynthesizeAnswerEvent
+    ) -> StopEvent:
+        """
+        Synthesize the answer.
+        """
+        memory: ChatMemoryBuffer = await ctx.get("memory")
+        chat_history = memory.get()
+        chat_history.append(
+            ChatMessage(
+                role="system",
+                content="""
+                You are a helpful assistant who is responsible for explaining the work to the user.
+                Based on the conversation history, provide an answer to the user's question. 
+                The user has access to the code so avoid mentioning the whole code again in your response.
+                """,
+            )
+        )
+        response_stream = await self.llm.astream_chat(
+            messages=chat_history,
+        )
+        ctx.write_event_to_stream(
+            UIEvent(
+                type="ui_event",
+                data=UIEventData(
+                    state="completed",
+                ),
+            )
+        )
+        return StopEvent(result=response_stream)
@@ -23,7 +23,18 @@ from llama_index.core.workflow import (
    Workflow,
    step,
 )
-from llama_index.server.api.models import ChatRequest, SourceNodesEvent, UIEvent
+from llama_index.server.api.models import (
+    ArtifactEvent,
+    ArtifactType,
+    ChatRequest,
+    SourceNodesEvent,
+    UIEvent,
+    Artifact,
+    DocumentArtifactData,
+    DocumentArtifactSource,
+)
+import time
+from llama_index.server.utils.stream import write_response_to_stream
 from pydantic import BaseModel, Field

 logger = logging.getLogger("uvicorn")
@@ -365,8 +376,31 @@ class DeepResearchWorkflow(Workflow):
            user_request=self.user_request,
            stream=self.stream,
        )
+
+        final_response = await write_response_to_stream(res, ctx)
+
+        ctx.write_event_to_stream(
+            ArtifactEvent(
+                data=Artifact(
+                    type=ArtifactType.DOCUMENT,
+                    created_at=int(time.time()),
+                    data=DocumentArtifactData(
+                        title="DeepResearch Report",
+                        content=final_response,
+                        type="markdown",
+                        sources=[
+                            DocumentArtifactSource(
+                                id=node.id_,
+                            )
+                            for node in self.context_nodes
+                        ],
+                    ),
+                ),
+            )
+        )
+
        return StopEvent(
-            result=res,
+            result="",
        )


@@ -0,0 +1,66 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project using [Workflows](https://docs.llamaindex.ai/en/stable/understanding/workflows/).
+
+## Getting Started
+
+First, setup the environment with uv:
+
+> **_Note:_** This step is not needed if you are using the dev-container.
+
+```shell
+uv sync
+```
+
+Then check the parameters that have been pre-configured in the `.env` file in this directory.
+Make sure you have set the `OPENAI_API_KEY` for the LLM.
+
+Then, run the development server:
+
+```shell
+uv run fastapi dev
+```
+
+Then open [http://localhost:8000](http://localhost:8000) with your browser to start the chat UI.
+
+To start the app optimized for **production**, run:
+
+```
+uv run fastapi run
+```
+
+## Configure LLM and Embedding Model
+
+You can configure [LLM model](https://docs.llamaindex.ai/en/stable/module_guides/models/llms) and [embedding model](https://docs.llamaindex.ai/en/stable/module_guides/models/embeddings) in [settings.py](app/settings.py).
+
+## Use Case
+
+AI-powered document generator that can help you generate documents with a chat interface and simple markdown editor.
+
+To update the workflow, you can modify the code in [`workflow.py`](app/workflow.py).
+
+You can start by sending an request on the [chat UI](http://localhost:8000) or you can test the `/api/chat` endpoint with the following curl request:
+
+```
+curl --location 'localhost:8000/api/chat' \
+--header 'Content-Type: application/json' \
+--data '{ "messages": [{ "role": "user", "content": "Create a report comparing the finances of Apple and Tesla" }] }'
+```
+
+## Customize the UI
+
+To customize the UI, you can start by modifying the [./components/ui_event.jsx](./components/ui_event.jsx) file.
+
+You can also generate a new code for the workflow using LLM by running the following command:
+
+```
+uv run generate_ui
+```
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex.
+- [Workflows Introduction](https://docs.llamaindex.ai/en/stable/understanding/workflows/) - learn about LlamaIndex workflows.
+- [LlamaIndex Server](https://pypi.org/project/llama-index-server/)
+
+You can check out [the LlamaIndex GitHub repository](https://github.com/run-llama/llama_index) - your feedback and contributions are welcome!
@@ -0,0 +1,347 @@
+import re
+import time
+from typing import Any, Literal, Optional
+
+from llama_index.core.chat_engine.types import ChatMessage
+from llama_index.core.llms import LLM
+from llama_index.llms.openai import OpenAI
+from llama_index.core.memory import ChatMemoryBuffer
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.workflow import (
+    Context,
+    Event,
+    StartEvent,
+    StopEvent,
+    Workflow,
+    step,
+)
+from llama_index.server.api.models import (
+    Artifact,
+    ArtifactEvent,
+    ArtifactType,
+    ChatRequest,
+    DocumentArtifactData,
+    UIEvent,
+)
+from llama_index.server.api.utils import get_last_artifact
+from pydantic import BaseModel, Field
+
+
+def create_workflow(chat_request: ChatRequest) -> Workflow:
+    workflow = DocumentArtifactWorkflow(
+        llm=OpenAI(model="gpt-4.1"),
+        chat_request=chat_request,
+        timeout=120.0,
+    )
+    return workflow
+
+
+class DocumentRequirement(BaseModel):
+    type: Literal["markdown", "html"]
+    title: str
+    requirement: str
+
+
+class PlanEvent(Event):
+    user_msg: str
+    context: Optional[str] = None
+
+
+class GenerateArtifactEvent(Event):
+    requirement: DocumentRequirement
+
+
+class SynthesizeAnswerEvent(Event):
+    requirement: DocumentRequirement
+    generated_artifact: str
+
+
+class UIEventData(BaseModel):
+    """
+    Event data for updating workflow status to the UI.
+    """
+
+    state: Literal["plan", "generate", "completed"] = Field(
+        description="The current state of the workflow. "
+        "plan: analyze and create a plan for the next step. "
+        "generate: generate the artifact based on the requirement from the previous step. "
+        "completed: the workflow is completed. "
+    )
+    requirement: Optional[str] = Field(
+        description="The requirement for generating the artifact. ",
+        default=None,
+    )
+
+
+class DocumentArtifactWorkflow(Workflow):
+    """
+    A workflow to help generate or update document artifacts (e.g., Markdown or HTML documents).
+    Example use cases: Generate a project guideline, update documentation with user feedback, etc.
+    """
+
+    def __init__(
+        self,
+        llm: LLM,
+        chat_request: ChatRequest,
+        **kwargs: Any,
+    ):
+        """
+        Args:
+            llm: The LLM to use.
+            chat_request: The chat request from the chat app to use.
+        """
+        super().__init__(**kwargs)
+        self.llm = llm
+        self.chat_request = chat_request
+        self.last_artifact = get_last_artifact(chat_request)
+
+    @step
+    async def prepare_chat_history(self, ctx: Context, ev: StartEvent) -> PlanEvent:
+        user_msg = ev.user_msg
+        if user_msg is None:
+            raise ValueError("user_msg is required to run the workflow")
+        await ctx.set("user_msg", user_msg)
+        chat_history = ev.chat_history or []
+        chat_history.append(
+            ChatMessage(
+                role="user",
+                content=user_msg,
+            )
+        )
+        memory = ChatMemoryBuffer.from_defaults(
+            chat_history=chat_history,
+            llm=self.llm,
+        )
+        await ctx.set("memory", memory)
+        return PlanEvent(
+            user_msg=user_msg,
+            context=str(self.last_artifact.model_dump_json())
+            if self.last_artifact
+            else "",
+        )
+
+    @step
+    async def planning(self, ctx: Context, event: PlanEvent) -> GenerateArtifactEvent:
+        """
+        Based on the conversation history and the user's request,
+        this step will provide a clear requirement for the next document generation or update.
+        """
+        ctx.write_event_to_stream(
+            UIEvent(
+                type="ui_event",
+                data=UIEventData(
+                    state="plan",
+                    requirement=None,
+                ),
+            )
+        )
+        prompt = PromptTemplate("""
+         You are a documentation analyst responsible for analyzing the user's request and providing requirements for document generation or update.
+         Follow these instructions:
+         1. Carefully analyze the conversation history and the user's request to determine what has been done and what the next step should be.
+         2. From the user's request, provide requirements for the next step of the document generation or update.
+         3. Do not be verbose; only return the requirements for the next step of the document generation or update.
+         4. Only the following document types are allowed: "markdown", "html".
+         5. The requirement should be in the following format:
+            ```json
+            {
+                "type": "markdown" | "html",
+                "title": string,
+                "requirement": string
+            }
+            ```
+
+         ## Example:
+         User request: Create a project guideline document.
+         You should return:
+         ```json
+         {
+             "type": "markdown",
+             "title": "Project Guideline",
+             "requirement": "Generate a Markdown document that outlines the project goals, deliverables, and timeline. Include sections for introduction, objectives, deliverables, and timeline."
+         }
+         ```
+
+         User request: Add a troubleshooting section to the guideline.
+         You should return:
+         ```json
+         {
+             "type": "markdown",
+             "title": "Project Guideline",
+             "requirement": "Add a 'Troubleshooting' section at the end of the document with common issues and solutions."
+         }
+         ```
+
+         {context}
+
+         Now, please plan for the user's request:
+         {user_msg}
+        """).format(
+            context=""
+            if event.context is None
+            else f"## The context is: \n{event.context}\n",
+            user_msg=event.user_msg,
+        )
+        response = await self.llm.acomplete(
+            prompt=prompt,
+            formatted=True,
+        )
+        # parse the response to DocumentRequirement
+        json_block = re.search(r"```json([\s\S]*)```", response.text)
+        if json_block is None:
+            raise ValueError("No json block found in the response")
+        requirement = DocumentRequirement.model_validate_json(
+            json_block.group(1).strip()
+        )
+
+        # Put the planning result to the memory
+        memory: ChatMemoryBuffer = await ctx.get("memory")
+        memory.put(
+            ChatMessage(
+                role="assistant",
+                content=f"Planning for the document generation: \n{response.text}",
+            )
+        )
+        await ctx.set("memory", memory)
+        ctx.write_event_to_stream(
+            UIEvent(
+                type="ui_event",
+                data=UIEventData(
+                    state="generate",
+                    requirement=requirement.requirement,
+                ),
+            )
+        )
+        return GenerateArtifactEvent(
+            requirement=requirement,
+        )
+
+    @step
+    async def generate_artifact(
+        self, ctx: Context, event: GenerateArtifactEvent
+    ) -> SynthesizeAnswerEvent:
+        """
+        Generate or update the document based on the user's request.
+        """
+        ctx.write_event_to_stream(
+            UIEvent(
+                type="ui_event",
+                data=UIEventData(
+                    state="generate",
+                    requirement=event.requirement.requirement,
+                ),
+            )
+        )
+        prompt = PromptTemplate("""
+         You are a skilled technical writer who can help users with documentation.
+         You are given a task to generate or update a document for a given requirement.
+
+         ## Follow these instructions:
+         **1. Carefully read the user's requirements.**
+            If any details are ambiguous or missing, make reasonable assumptions and clearly reflect those in your output.
+            If the previous document is provided:
+            + Carefully analyze the document with the request to make the right changes.
+            + Avoid making unnecessary changes from the previous document if the request is not to rewrite it from scratch.
+         **2. For document requests:**
+            - If the user does not specify a type, default to Markdown.
+            - Ensure the document is clear, well-structured, and grammatically correct.
+            - Only generate content relevant to the user's request—do not add extra boilerplate.
+         **3. Do not be verbose in your response.**
+            - No other text or comments; only return the document content wrapped by the appropriate code block (```markdown or ```html).
+            - If the user's request is to update the document, only return the updated document.
+         **4. Only the following types are allowed: "markdown", "html".**
+         **5. If there is no change to the document, return the reason without any code block.**
+
+         ## Example:
+         ```markdown
+         # Project Guideline
+         
+         ## Introduction
+         ...
+         ```
+
+         The previous content is:
+         {previous_artifact}
+
+         Now, please generate the document for the following requirement:
+         {requirement}
+         """).format(
+            previous_artifact=self.last_artifact.model_dump_json()
+            if self.last_artifact
+            else "",
+            requirement=event.requirement,
+        )
+        response = await self.llm.acomplete(
+            prompt=prompt,
+            formatted=True,
+        )
+        # Extract the document from the response
+        language_pattern = r"```(markdown|html)([\s\S]*)```"
+        doc_match = re.search(language_pattern, response.text)
+        if doc_match is None:
+            return SynthesizeAnswerEvent(
+                requirement=event.requirement,
+                generated_artifact="There is no change to the document. "
+                + response.text.strip(),
+            )
+        content = doc_match.group(2).strip()
+        doc_type = doc_match.group(1)
+        # Put the generated document to the memory
+        memory: ChatMemoryBuffer = await ctx.get("memory")
+        memory.put(
+            ChatMessage(
+                role="assistant",
+                content=f"Generated document: \n{response.text}",
+            )
+        )
+        # To show the Canvas panel for the artifact
+        ctx.write_event_to_stream(
+            ArtifactEvent(
+                data=Artifact(
+                    type=ArtifactType.DOCUMENT,
+                    created_at=int(time.time()),
+                    data=DocumentArtifactData(
+                        title=event.requirement.title,
+                        content=content,
+                        type=doc_type,  # type: ignore
+                    ),
+                ),
+            )
+        )
+        return SynthesizeAnswerEvent(
+            requirement=event.requirement,
+            generated_artifact=response.text,
+        )
+
+    @step
+    async def synthesize_answer(
+        self, ctx: Context, event: SynthesizeAnswerEvent
+    ) -> StopEvent:
+        """
+        Synthesize the answer for the user.
+        """
+        memory: ChatMemoryBuffer = await ctx.get("memory")
+        chat_history = memory.get()
+        chat_history.append(
+            ChatMessage(
+                role="system",
+                content="""
+                Your responsibility is to explain the work to the user.
+                If there is no document to update, explain the reason.
+                If the document is updated, just summarize what changed. Don't need to include the whole document again in the response.
+                """,
+            )
+        )
+        response_stream = await self.llm.astream_chat(
+            messages=chat_history,
+        )
+        ctx.write_event_to_stream(
+            UIEvent(
+                type="ui_event",
+                data=UIEventData(
+                    state="completed",
+                    requirement=event.requirement.requirement,
+                ),
+            )
+        )
+        return StopEvent(result=response_stream)
@@ -41,6 +41,14 @@ curl --location 'localhost:3000/api/chat' \
 --data '{ "messages": [{ "role": "user", "content": "What standards for a letter exist?" }] }'
 ```

+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
 ## Learn More

 To learn more about LlamaIndex, take a look at the following resources:
@@ -1,4 +1,4 @@
-import { agent } from "llamaindex";
+import { agent } from "@llamaindex/workflow";
 import { getIndex } from "./data";

 export const workflowFactory = async (reqBody: any) => {
@@ -0,0 +1,39 @@
+import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
+import "dotenv/config";
+import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
+import { initSettings } from "./app/settings";
+
+async function generateDatasource() {
+  console.log(`Generating storage context...`);
+  // Split documents, create embeddings and store them in the storage context
+  const storageContext = await storageContextFromDefaults({
+    persistDir: "storage",
+  });
+  // load documents from current directory into an index
+  const reader = new SimpleDirectoryReader();
+  const documents = await reader.loadData("data");
+
+  await VectorStoreIndex.fromDocuments(documents, {
+    storageContext,
+  });
+  console.log("Storage context successfully generated.");
+}
+
+(async () => {
+  const args = process.argv.slice(2);
+  const command = args[0];
+
+  initSettings();
+
+  if (command === "ui") {
+    console.error("This project doesn't use any custom UI.");
+    return;
+  } else {
+    if (command !== "datasource") {
+      console.error(
+        `Unrecognized command: ${command}. Generating datasource by default.`,
+      );
+    }
+    await generateDatasource();
+  }
+})();
@@ -0,0 +1,61 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama).
+
+## Getting Started
+
+First, install the dependencies:
+
+```
+npm install
+```
+
+Second, run the development server:
+
+```
+npm run dev
+```
+
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the chat UI.
+
+## Configure LLM and Embedding Model
+
+You can configure [LLM model](https://ts.llamaindex.ai/docs/llamaindex/modules/llms) in the [settings file](src/app/settings.ts).
+
+## Custom UI Components
+
+We have a custom component located in `components/ui_event.jsx`. This is used to display the state of artifact workflows in UI. You can regenerate a new UI component from the workflow event schema by running the following command:
+
+```
+npm run generate:ui
+```
+
+## Use Case
+
+AI-powered code generator that can help you generate app with a chat interface, code editor and app preview.
+
+To update the workflow, you can modify the code in [`workflow.ts`](app/workflow.ts).
+
+You can start by sending a request on the [chat UI](http://localhost:3000) or you can test the `/api/chat` endpoint with the following curl request:
+
+```shell
+curl --location 'localhost:3000/api/chat' \
+--header 'Content-Type: application/json' \
+--data '{ "messages": [{ "role": "user", "content": "Compare the financial performance of Apple and Tesla" }] }'
+```
+
+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
+- [LlamaIndexTS Documentation](https://ts.llamaindex.ai/docs/llamaindex) - learn about LlamaIndex (Typescript features).
+- [Workflows Introduction](https://ts.llamaindex.ai/docs/llamaindex/modules/workflows) - learn about LlamaIndexTS workflows.
+
+You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!
@@ -0,0 +1,337 @@
+import { artifactEvent, extractLastArtifact } from "@llamaindex/server";
+import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";
+
+import {
+  agentStreamEvent,
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+  stopAgentEvent,
+  workflowEvent,
+} from "@llamaindex/workflow";
+
+import { z } from "zod";
+
+export const RequirementSchema = z.object({
+  next_step: z.enum(["answering", "coding"]),
+  language: z.string().nullable().optional(),
+  file_name: z.string().nullable().optional(),
+  requirement: z.string(),
+});
+
+export type Requirement = z.infer<typeof RequirementSchema>;
+
+export const UIEventSchema = z.object({
+  type: z.literal("ui_event"),
+  data: z.object({
+    state: z
+      .enum(["plan", "generate", "completed"])
+      .describe(
+        "The current state of the workflow: 'plan', 'generate', or 'completed'.",
+      ),
+    requirement: z
+      .string()
+      .optional()
+      .describe(
+        "An optional requirement creating or updating a code, if applicable.",
+      ),
+  }),
+});
+
+export type UIEvent = z.infer<typeof UIEventSchema>;
+const planEvent = workflowEvent<{
+  userInput: MessageContent;
+  context?: string | undefined;
+}>();
+
+const generateArtifactEvent = workflowEvent<{
+  requirement: Requirement;
+}>();
+
+const synthesizeAnswerEvent = workflowEvent<object>();
+
+const uiEvent = workflowEvent<UIEvent>();
+
+export function workflowFactory(reqBody: any) {
+  const llm = Settings.llm;
+
+  const { withState, getContext } = createStatefulMiddleware(() => {
+    return {
+      memory: new ChatMemoryBuffer({ llm }),
+      lastArtifact: extractLastArtifact(reqBody),
+    };
+  });
+  const workflow = withState(createWorkflow());
+
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { userInput, chatHistory = [] } = data;
+    // Prepare chat history
+    const { state } = getContext();
+    // Put user input to the memory
+    if (!userInput) {
+      throw new Error("Missing user input to start the workflow");
+    }
+    state.memory.set(chatHistory);
+    state.memory.put({ role: "user", content: userInput });
+
+    return planEvent.with({
+      userInput: userInput,
+      context: state.lastArtifact
+        ? JSON.stringify(state.lastArtifact)
+        : undefined,
+    });
+  });
+
+  workflow.handle([planEvent], async ({ data: planData }) => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "plan",
+        },
+      }),
+    );
+    const user_msg = planData.userInput;
+    const context = planData.context
+      ? `## The context is: \n${planData.context}\n`
+      : "";
+    const prompt = `
+You are a product analyst responsible for analyzing the user's request and providing the next step for code or document generation.
+You are helping user with their code artifact. To update the code, you need to plan a coding step.
+
+Follow these instructions:
+1. Carefully analyze the conversation history and the user's request to determine what has been done and what the next step should be.
+2. The next step must be one of the following two options:
+    - "coding": To make the changes to the current code.
+    - "answering": If you don't need to update the current code or need clarification from the user.
+Important: Avoid telling the user to update the code themselves, you are the one who will update the code (by planning a coding step).
+3. If the next step is "coding", you may specify the language ("typescript" or "python") and file_name if known, otherwise set them to null. 
+4. The requirement must be provided clearly what is the user request and what need to be done for the next step in details
+    as precise and specific as possible, don't be stingy with in the requirement.
+5. If the next step is "answering", set language and file_name to null, and the requirement should describe what to answer or explain to the user.
+6. Be concise; only return the requirements for the next step.
+7. The requirements must be in the following format:
+    \`\`\`json
+    {
+        "next_step": "answering" | "coding",
+        "language": "typescript" | "python" | null,
+        "file_name": string | null,
+        "requirement": string
+    }
+    \`\`\`
+
+## Example 1:
+User request: Create a calculator app.
+You should return:
+\`\`\`json
+{
+    "next_step": "coding",
+    "language": "typescript",
+    "file_name": "calculator.tsx",
+    "requirement": "Generate code for a calculator app that has a simple UI with a display and button layout. The display should show the current input and the result. The buttons should include basic operators, numbers, clear, and equals. The calculation should work correctly."
+}
+\`\`\`
+
+## Example 2:
+User request: Explain how the game loop works.
+Context: You have already generated the code for a snake game.
+You should return:
+\`\`\`json
+{
+    "next_step": "answering",
+    "language": null,
+    "file_name": null,
+    "requirement": "The user is asking about the game loop. Explain how the game loop works."
+}
+\`\`\`
+
+${context}
+
+Now, plan the user's next step for this request:
+${user_msg}
+`;
+
+    const response = await llm.complete({
+      prompt,
+    });
+    // parse the response to Requirement
+    // 1. use regex to find the json block
+    const jsonBlock = response.text.match(/```json\s*([\s\S]*?)\s*```/);
+    if (!jsonBlock) {
+      throw new Error("No JSON block found in the response.");
+    }
+    const requirement = RequirementSchema.parse(JSON.parse(jsonBlock[1]));
+    state.memory.put({
+      role: "assistant",
+      content: `The plan for next step: \n${response.text}`,
+    });
+
+    if (requirement.next_step === "coding") {
+      return generateArtifactEvent.with({
+        requirement,
+      });
+    } else {
+      return synthesizeAnswerEvent.with({});
+    }
+  });
+
+  workflow.handle([generateArtifactEvent], async ({ data: planData }) => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "generate",
+          requirement: planData.requirement.requirement,
+        },
+      }),
+    );
+
+    const previousArtifact = state.lastArtifact
+      ? JSON.stringify(state.lastArtifact)
+      : "There is no previous artifact";
+    const requirementText = planData.requirement.requirement;
+
+    const prompt = `
+        You are a skilled developer who can help user with coding.
+        You are given a task to generate or update a code for a given requirement.
+
+        ## Follow these instructions:
+        **1. Carefully read the user's requirements.** 
+           If any details are ambiguous or missing, make reasonable assumptions and clearly reflect those in your output.
+           If the previous code is provided:
+           + Carefully analyze the code with the request to make the right changes.
+           + Avoid making a lot of changes from the previous code if the request is not to write the code from scratch again.
+        **2. For code requests:**
+           - If the user does not specify a framework or language, default to a React component using the Next.js framework.
+           - For Next.js, use Shadcn UI components, Typescript, @types/node, @types/react, @types/react-dom, PostCSS, and TailwindCSS.
+           The import pattern should be:
+           \`\`\`typescript
+           import { ComponentName } from "@/components/ui/component-name"
+           import { Markdown } from "@llamaindex/chat-ui"
+           import { cn } from "@/lib/utils"
+           \`\`\`
+           - Ensure the code is idiomatic, production-ready, and includes necessary imports.
+           - Only generate code relevant to the user's request—do not add extra boilerplate.
+        **3. Don't be verbose on response**
+           - No other text or comments only return the code which wrapped by \`\`\`language\`\`\` block.
+           - If the user's request is to update the code, only return the updated code.
+        **4. Only the following languages are allowed: "typescript", "python".**
+        **5. If there is no code to update, return the reason without any code block.**
+           
+        ## Example:
+        \`\`\`typescript
+        import React from "react";
+        import { Button } from "@/components/ui/button";
+        import { cn } from "@/lib/utils";
+
+        export default function MyComponent() {
+        return (
+           <div className="flex flex-col items-center justify-center h-screen">
+              <Button>Click me</Button>
+           </div>
+        );
+        }
+        \`\`\`
+
+        The previous code is:
+        {previousArtifact}
+
+        Now, i have to generate the code for the following requirement:
+        {requirement}
+      `
+      .replace("{previousArtifact}", previousArtifact)
+      .replace("{requirement}", requirementText);
+
+    const response = await llm.complete({
+      prompt,
+    });
+
+    // Extract the code from the response
+    const codeMatch = response.text.match(/```(\w+)([\s\S]*)```/);
+    if (!codeMatch) {
+      return synthesizeAnswerEvent.with({});
+    }
+
+    const code = codeMatch[2].trim();
+
+    // Put the generated code to the memory
+    state.memory.put({
+      role: "assistant",
+      content: `Updated the code: \n${response.text}`,
+    });
+
+    // To show the Canvas panel for the artifact
+    sendEvent(
+      artifactEvent.with({
+        type: "artifact",
+        data: {
+          type: "code",
+          created_at: Date.now(),
+          data: {
+            language: planData.requirement.language || "",
+            file_name: planData.requirement.file_name || "",
+            code,
+          },
+        },
+      }),
+    );
+
+    return synthesizeAnswerEvent.with({});
+  });
+
+  workflow.handle([synthesizeAnswerEvent], async () => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+
+    const chatHistory = await state.memory.getMessages();
+    const messages = [
+      ...chatHistory,
+      {
+        role: "system" as const,
+        content: `
+        You are a helpful assistant who is responsible for explaining the work to the user.
+        Based on the conversation history, provide an answer to the user's question. 
+        The user has access to the code so avoid mentioning the whole code again in your response.
+      `,
+      },
+    ];
+
+    const responseStream = await llm.chat({
+      messages,
+      stream: true,
+    });
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "completed",
+        },
+      }),
+    );
+
+    let response = "";
+    for await (const chunk of responseStream) {
+      response += chunk.delta;
+      sendEvent(
+        agentStreamEvent.with({
+          delta: chunk.delta,
+          response: "",
+          currentAgentName: "assistant",
+          raw: chunk,
+        }),
+      );
+    }
+
+    return stopAgentEvent.with({
+      result: response,
+    });
+  });
+
+  return workflow;
+}
@@ -31,7 +31,7 @@ You can configure [LLM model](https://ts.llamaindex.ai/docs/llamaindex/modules/l

 ## Custom UI Components

-For Deep Research, we have a custom component located in `components/deep_research_event.jsx`. This is used to display the results of the deep research workflow in a more user-friendly way
+For Deep Research, we have a custom component located in `components/ui_event.jsx`. This is used to display the results of the deep research workflow in a more user-friendly way

 ### Generate a new UI Component from workflow event

@@ -53,6 +53,14 @@ curl --location 'localhost:3000/api/chat' \
 --data '{ "messages": [{ "role": "user", "content": "Compare the financial performance of Apple and Tesla" }] }'
 ```

+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
 ## Learn More

 To learn more about LlamaIndex, take a look at the following resources:
@@ -0,0 +1,436 @@
+import { artifactEvent, toSourceEvent } from "@llamaindex/server";
+import {
+  agentStreamEvent,
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+  stopAgentEvent,
+  workflowEvent,
+} from "@llamaindex/workflow";
+import {
+  ChatMemoryBuffer,
+  LlamaCloudIndex,
+  MessageContent,
+  Metadata,
+  MetadataMode,
+  NodeWithScore,
+  PromptTemplate,
+  Settings,
+  VectorStoreIndex,
+  extractText,
+} from "llamaindex";
+import { randomUUID } from "node:crypto";
+import { z } from "zod";
+import { getIndex } from "./data";
+
+// workflow factory
+export const workflowFactory = async (reqBody: any) => {
+  const index = await getIndex(reqBody?.data);
+  return getWorkflow(index);
+};
+
+// workflow configs
+const MAX_QUESTIONS = 6; // max number of questions to research, research will stop when this number is reached
+const TOP_K = 10; // number of nodes to retrieve from the vector store
+
+const createPlanResearchPrompt = new PromptTemplate({
+  template: `
+You are a professor who is guiding a researcher to research a specific request/problem.
+Your task is to decide on a research plan for the researcher.
+
+The possible actions are:
+ Provide a list of questions for the researcher to investigate, with the purpose of clarifying the request.
+ Write a report if the researcher has already gathered enough research on the topic and can resolve the initial request.
+ Cancel the research if most of the answers from researchers indicate there is insufficient information to research the request. Do not attempt more than 3 research iterations or too many questions.
+
+The workflow should be:
+ Always begin by providing some initial questions for the researcher to investigate.
+ Analyze the provided answers against the initial topic/request. If the answers are insufficient to resolve the initial request, provide additional questions for the researcher to investigate.
+ If the answers are sufficient to resolve the initial request, instruct the researcher to write a report.
+
+Here are the context: 
+<Collected information>
+{context_str}
+</Collected information>
+
+<Conversation context>
+{conversation_context}
+</Conversation context>
+
+{enhanced_prompt}
+
+Now, provide your decision in the required format for this user request:
+<User request>
+{user_request}
+</User request>
+`,
+  templateVars: [
+    "context_str",
+    "conversation_context",
+    "enhanced_prompt",
+    "user_request",
+  ],
+});
+
+const researchPrompt = new PromptTemplate({
+  template: `
+You are a researcher who is in the process of answering the question.
+The purpose is to answer the question based on the collected information, without using prior knowledge or making up any new information.
+Always add citations to the sentence/point/paragraph using the id of the provided content.
+The citation should follow this format: [citation:id] where id is the id of the content.
+
+E.g:
+If we have a context like this:
+<Citation id='abc-xyz'>
+Baby llama is called cria
+</Citation id='abc-xyz'>
+
+And your answer uses the content, then the citation should be:
+- Baby llama is called cria [citation:abc-xyz]
+
+ Here is the provided context for the question:
+<Collected information>
+{context_str}
+</Collected information>
+
+No prior knowledge, just use the provided context to answer the question: {question}
+`,
+  templateVars: ["context_str", "question"],
+});
+
+const WRITE_REPORT_PROMPT = `
+You are a researcher writing a report based on a user request and the research context.
+You have researched various perspectives related to the user request.
+The report should provide a comprehensive outline covering all important points from the researched perspectives.
+Create a well-structured outline for the research report that covers all the answers.
+
+# IMPORTANT when writing in markdown format:
+ Use tables or figures where appropriate to enhance presentation.
+ Preserve all citation syntax (the \`[citation:id]()\` parts in the provided context). Keep these citations in the final report - no separate reference section is needed.
+ Do not add links, a table of contents, or a references section to the report.
+`;
+
+// workflow events
+type ResearchQuestion = { questionId: string; question: string };
+type ResearchResult = ResearchQuestion & { answer: string };
+
+// class PlanResearchEvent extends WorkflowEvent<{}> {}
+const planResearchEvent = workflowEvent<{}>();
+const researchEvent = workflowEvent<ResearchQuestion>();
+const reportEvent = workflowEvent<{}>();
+
+export const UIEventSchema = z
+  .object({
+    event: z
+      .enum(["retrieve", "analyze", "answer"])
+      .describe(
+        "The type of event. DeepResearch has 3 main stages:\n1. retrieve: Retrieve the context from the vector store\n2. analyze: Analyze the context and generate a research questions to answer\n3. answer: Answer the provided questions. Each question has a unique id, when the state is done, the event will have the answer for the question.",
+      ),
+    state: z
+      .enum(["pending", "inprogress", "done", "error"])
+      .describe("The state for each event"),
+    id: z.string().optional().describe("The id of the question"),
+    question: z
+      .string()
+      .optional()
+      .describe("The question generated by the LLM"),
+    answer: z.string().optional().describe("The answer generated by the LLM"),
+  })
+  .describe("DeepResearchEvent");
+
+type UIEventData = z.infer<typeof UIEventSchema>;
+
+const uiEvent = workflowEvent<{
+  type: "ui_event";
+  data: UIEventData;
+}>();
+
+// workflow definition
+export function getWorkflow(index: VectorStoreIndex | LlamaCloudIndex) {
+  const retriever = index.asRetriever({ similarityTopK: TOP_K });
+  const { withState, getContext } = createStatefulMiddleware(() => {
+    return {
+      memory: new ChatMemoryBuffer({
+        llm: Settings.llm,
+        chatHistory: [],
+      }),
+      contextNodes: [] as NodeWithScore<Metadata>[],
+      userRequest: "" as MessageContent,
+      totalQuestions: 0,
+      researchResults: [] as ResearchResult[],
+    };
+  });
+  const workflow = withState(createWorkflow());
+
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { userInput, chatHistory = [] } = data;
+    const { sendEvent, state } = getContext();
+    if (!userInput) throw new Error("Invalid input");
+
+    state.memory.set(chatHistory);
+    state.memory.put({ role: "user", content: userInput });
+    state.userRequest = userInput;
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          event: "retrieve",
+          state: "inprogress",
+        },
+      }),
+    );
+
+    const retrievedNodes = await retriever.retrieve({ query: userInput });
+
+    sendEvent(toSourceEvent(retrievedNodes));
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: { event: "retrieve", state: "done" },
+      }),
+    );
+
+    state.contextNodes.push(...retrievedNodes);
+
+    return planResearchEvent.with({});
+  });
+
+  workflow.handle([planResearchEvent], async ({ data }) => {
+    const { sendEvent, state, stream } = getContext();
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: { event: "analyze", state: "inprogress" },
+      }),
+    );
+
+    const { decision, researchQuestions, cancelReason } =
+      await createResearchPlan(
+        state.memory,
+        state.contextNodes
+          .map((node) => node.node.getContent(MetadataMode.NONE))
+          .join("\n"),
+        enhancedPrompt(state.totalQuestions),
+        state.userRequest,
+      );
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: { event: "analyze", state: "done" },
+      }),
+    );
+    if (decision === "cancel") {
+      sendEvent(
+        uiEvent.with({
+          type: "ui_event",
+          data: { event: "analyze", state: "done" },
+        }),
+      );
+      return agentStreamEvent.with({
+        delta: cancelReason ?? "Research cancelled without any reason.",
+        response: cancelReason ?? "Research cancelled without any reason.",
+        currentAgentName: "",
+        raw: null,
+      });
+    }
+    if (decision === "research" && researchQuestions.length > 0) {
+      state.totalQuestions += researchQuestions.length;
+      state.memory.put({
+        role: "assistant",
+        content:
+          "We need to find answers to the following questions:\n" +
+          researchQuestions.join("\n"),
+      });
+      researchQuestions.forEach(({ questionId: id, question }) => {
+        sendEvent(
+          uiEvent.with({
+            type: "ui_event",
+            data: { event: "answer", state: "pending", id, question },
+          }),
+        );
+        sendEvent(researchEvent.with({ questionId: id, question }));
+      });
+      const events = await stream
+        .until(() => state.researchResults.length === researchQuestions.length)
+        .toArray();
+      return planResearchEvent.with({});
+    }
+    state.memory.put({
+      role: "assistant",
+      content: "No more idea to analyze. We should report the answers.",
+    });
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: { event: "analyze", state: "done" },
+      }),
+    );
+    return reportEvent.with({});
+  });
+
+  workflow.handle([researchEvent], async ({ data }) => {
+    const { sendEvent, state } = getContext();
+    const { questionId, question } = data;
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          event: "answer",
+          state: "inprogress",
+          id: questionId,
+          question,
+        },
+      }),
+    );
+
+    const answer = await answerQuestion(
+      contextStr(state.contextNodes),
+      question,
+    );
+    state.researchResults.push({ questionId, question, answer });
+
+    state.memory.put({
+      role: "assistant",
+      content: `<Question>${question}</Question>\n<Answer>${answer}</Answer>`,
+    });
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          event: "answer",
+          state: "done",
+          id: questionId,
+          question,
+          answer,
+        },
+      }),
+    );
+  });
+
+  workflow.handle([reportEvent], async ({ data }) => {
+    const { sendEvent, state } = getContext();
+    const chatHistory = await state.memory.getAllMessages();
+    const messages = chatHistory.concat([
+      {
+        role: "system",
+        content: WRITE_REPORT_PROMPT,
+      },
+      {
+        role: "user",
+        content:
+          "Write a report addressing the user request based on the research provided the context",
+      },
+    ]);
+
+    const stream = await Settings.llm.chat({ messages, stream: true });
+    let response = "";
+    for await (const chunk of stream) {
+      response += chunk.delta;
+      sendEvent(
+        agentStreamEvent.with({
+          delta: chunk.delta,
+          response,
+          currentAgentName: "",
+          raw: stream,
+        }),
+      );
+    }
+
+    // Open the generated report in Canvas
+    sendEvent(
+      artifactEvent.with({
+        type: "artifact",
+        data: {
+          type: "document",
+          created_at: Date.now(),
+          data: {
+            title: "DeepResearch Report",
+            content: response,
+            type: "markdown",
+            sources: state.contextNodes.map((node) => ({
+              id: node.node.id_,
+            })),
+          },
+        },
+      }),
+    );
+
+    return stopAgentEvent.with({
+      result: response,
+    });
+  });
+
+  return workflow;
+}
+
+const createResearchPlan = async (
+  memory: ChatMemoryBuffer,
+  contextStr: string,
+  enhancedPrompt: string,
+  userRequest: MessageContent,
+) => {
+  const chatHistory = await memory.getMessages();
+
+  const conversationContext = chatHistory
+    .map((message) => `${message.role}: ${message.content}`)
+    .join("\n");
+
+  const prompt = createPlanResearchPrompt.format({
+    context_str: contextStr,
+    conversation_context: conversationContext,
+    enhanced_prompt: enhancedPrompt,
+    user_request: extractText(userRequest),
+  });
+
+  const responseFormat = z.object({
+    decision: z.enum(["research", "write", "cancel"]),
+    researchQuestions: z.array(z.string()),
+    cancelReason: z.string().optional(),
+  });
+
+  const result = await Settings.llm.complete({ prompt, responseFormat });
+  const plan = JSON.parse(result.text) as z.infer<typeof responseFormat>;
+
+  return {
+    ...plan,
+    researchQuestions: plan.researchQuestions.map((question) => ({
+      questionId: randomUUID(),
+      question,
+    })),
+  };
+};
+
+const contextStr = (contextNodes: NodeWithScore<Metadata>[]) => {
+  return contextNodes
+    .map((node) => {
+      const nodeId = node.node.id_;
+      const nodeContent = node.node.getContent(MetadataMode.NONE);
+      return `<Citation id='${nodeId}'>\n${nodeContent}</Citation id='${nodeId}'>`;
+    })
+    .join("\n");
+};
+
+const enhancedPrompt = (totalQuestions: number) => {
+  if (totalQuestions === 0) {
+    return "The student has no questions to research. Let start by providing some questions for the student to research.";
+  }
+
+  if (totalQuestions >= MAX_QUESTIONS) {
+    return `The student has researched ${totalQuestions} questions. Should proceeding writing report or cancel the research if the answers are not enough to write a report.`;
+  }
+
+  return "";
+};
+
+const answerQuestion = async (contextStr: string, question: string) => {
+  const prompt = researchPrompt.format({
+    context_str: contextStr,
+    question,
+  });
+  const result = await Settings.llm.complete({ prompt });
+  return result.text;
+};
@@ -0,0 +1,61 @@
+This is a [LlamaIndex](https://www.llamaindex.ai/) project bootstrapped with [`create-llama`](https://github.com/run-llama/LlamaIndexTS/tree/main/packages/create-llama).
+
+## Getting Started
+
+First, install the dependencies:
+
+```
+npm install
+```
+
+Second, run the development server:
+
+```
+npm run dev
+```
+
+Open [http://localhost:3000](http://localhost:3000) with your browser to see the chat UI.
+
+## Configure LLM and Embedding Model
+
+You can configure [LLM model](https://ts.llamaindex.ai/docs/llamaindex/modules/llms) in the [settings file](src/app/settings.ts).
+
+## Custom UI Components
+
+We have a custom component located in `components/ui_event.jsx`. This is used to display the state of artifact workflows in UI. You can regenerate a new UI component from the workflow event schema by running the following command:
+
+```
+npm run generate:ui
+```
+
+## Use Case
+
+AI-powered document generator that can help you generate documents with a chat interface and simple markdown editor.
+
+To update the workflow, you can modify the code in [`workflow.ts`](app/workflow.ts).
+
+You can start by sending a request on the [chat UI](http://localhost:3000) or you can test the `/api/chat` endpoint with the following curl request:
+
+```shell
+curl --location 'localhost:3000/api/chat' \
+--header 'Content-Type: application/json' \
+--data '{ "messages": [{ "role": "user", "content": "Compare the financial performance of Apple and Tesla" }] }'
+```
+
+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
+## Learn More
+
+To learn more about LlamaIndex, take a look at the following resources:
+
+- [LlamaIndex Documentation](https://docs.llamaindex.ai) - learn about LlamaIndex (Python features).
+- [LlamaIndexTS Documentation](https://ts.llamaindex.ai/docs/llamaindex) - learn about LlamaIndex (Typescript features).
+- [Workflows Introduction](https://ts.llamaindex.ai/docs/llamaindex/modules/workflows) - learn about LlamaIndexTS workflows.
+
+You can check out [the LlamaIndexTS GitHub repository](https://github.com/run-llama/LlamaIndexTS) - your feedback and contributions are welcome!
@@ -0,0 +1,315 @@
+import { artifactEvent, extractLastArtifact } from "@llamaindex/server";
+import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";
+
+import {
+  agentStreamEvent,
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+  stopAgentEvent,
+  workflowEvent,
+} from "@llamaindex/workflow";
+
+import { z } from "zod";
+
+export const DocumentRequirementSchema = z.object({
+  type: z.enum(["markdown", "html"]),
+  title: z.string(),
+  requirement: z.string(),
+});
+
+export type DocumentRequirement = z.infer<typeof DocumentRequirementSchema>;
+
+export const UIEventSchema = z.object({
+  type: z.literal("ui_event"),
+  data: z.object({
+    state: z
+      .enum(["plan", "generate", "completed"])
+      .describe(
+        "The current state of the workflow: 'plan', 'generate', or 'completed'.",
+      ),
+    requirement: z
+      .string()
+      .optional()
+      .describe(
+        "An optional requirement creating or updating a document, if applicable.",
+      ),
+  }),
+});
+
+export type UIEvent = z.infer<typeof UIEventSchema>;
+
+const planEvent = workflowEvent<{
+  userInput: MessageContent;
+  context?: string | undefined;
+}>();
+
+const generateArtifactEvent = workflowEvent<{
+  requirement: DocumentRequirement;
+}>();
+
+const synthesizeAnswerEvent = workflowEvent<{
+  requirement: DocumentRequirement;
+  generatedArtifact: string;
+}>();
+
+const uiEvent = workflowEvent<UIEvent>();
+
+export function workflowFactory(reqBody: any) {
+  const llm = Settings.llm;
+
+  const { withState, getContext } = createStatefulMiddleware(() => {
+    return {
+      memory: new ChatMemoryBuffer({ llm }),
+      lastArtifact: extractLastArtifact(reqBody),
+    };
+  });
+  const workflow = withState(createWorkflow());
+
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { userInput, chatHistory = [] } = data;
+    // Prepare chat history
+    const { state } = getContext();
+    // Put user input to the memory
+    if (!userInput) {
+      throw new Error("Missing user input to start the workflow");
+    }
+    state.memory.set(chatHistory);
+    state.memory.put({ role: "user", content: userInput });
+
+    return planEvent.with({
+      userInput,
+      context: state.lastArtifact
+        ? JSON.stringify(state.lastArtifact)
+        : undefined,
+    });
+  });
+
+  workflow.handle([planEvent], async ({ data: planData }) => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "plan",
+        },
+      }),
+    );
+    const user_msg = planData.userInput;
+    const context = planData.context
+      ? `## The context is: \n${planData.context}\n`
+      : "";
+    const prompt = `
+         You are a documentation analyst responsible for analyzing the user's request and providing requirements for document generation or update.
+         Follow these instructions:
+         1. Carefully analyze the conversation history and the user's request to determine what has been done and what the next step should be.
+         2. From the user's request, provide requirements for the next step of the document generation or update.
+         3. Do not be verbose; only return the requirements for the next step of the document generation or update.
+         4. Only the following document types are allowed: "markdown", "html".
+         5. The requirement should be in the following format:
+            \`\`\`json
+            {
+                "type": "markdown" | "html",
+                "title": string,
+                "requirement": string
+            }
+            \`\`\`
+
+         ## Example:
+         User request: Create a project guideline document.
+         You should return:
+         \`\`\`json
+         {
+             "type": "markdown",
+             "title": "Project Guideline",
+             "requirement": "Generate a Markdown document that outlines the project goals, deliverables, and timeline. Include sections for introduction, objectives, deliverables, and timeline."
+         }
+         \`\`\`
+
+         User request: Add a troubleshooting section to the guideline.
+         You should return:
+         \`\`\`json
+         {
+             "type": "markdown",
+             "title": "Project Guideline",
+             "requirement": "Add a 'Troubleshooting' section at the end of the document with common issues and solutions."
+         }
+         \`\`\`
+
+         ${context}
+
+         Now, please plan for the user's request:
+         ${user_msg}
+        `;
+
+    const response = await llm.complete({
+      prompt,
+    });
+    // Parse the response to DocumentRequirement
+    const jsonBlock = response.text.match(/```json\s*([\s\S]*?)\s*```/);
+    if (!jsonBlock) {
+      throw new Error("No JSON block found in the response.");
+    }
+    const requirement = DocumentRequirementSchema.parse(
+      JSON.parse(jsonBlock[1]),
+    );
+    state.memory.put({
+      role: "assistant",
+      content: `Planning for the document generation: \n${response.text}`,
+    });
+    return generateArtifactEvent.with({
+      requirement,
+    });
+  });
+
+  workflow.handle(
+    [generateArtifactEvent],
+    async ({ data: { requirement } }) => {
+      const { sendEvent } = getContext();
+      const { state } = getContext();
+
+      sendEvent(
+        uiEvent.with({
+          type: "ui_event",
+          data: {
+            state: "generate",
+            requirement: requirement.requirement,
+          },
+        }),
+      );
+
+      const previousArtifact = state.lastArtifact
+        ? JSON.stringify(state.lastArtifact)
+        : "";
+      const requirementStr = JSON.stringify(requirement);
+
+      const prompt = `
+         You are a skilled technical writer who can help users with documentation.
+         You are given a task to generate or update a document for a given requirement.
+
+         ## Follow these instructions:
+         **1. Carefully read the user's requirements.**
+            If any details are ambiguous or missing, make reasonable assumptions and clearly reflect those in your output.
+            If the previous document is provided:
+            + Carefully analyze the document with the request to make the right changes.
+            + Avoid making unnecessary changes from the previous document if the request is not to rewrite it from scratch.
+         **2. For document requests:**
+            - If the user does not specify a type, default to Markdown.
+            - Ensure the document is clear, well-structured, and grammatically correct.
+            - Only generate content relevant to the user's request—do not add extra boilerplate.
+         **3. Do not be verbose in your response.**
+            - No other text or comments; only return the document content wrapped by the appropriate code block (\`\`\`markdown or \`\`\`html).
+            - If the user's request is to update the document, only return the updated document.
+         **4. Only the following types are allowed: "markdown", "html".**
+         **5. If there is no change to the document, return the reason without any code block.**
+
+         ## Example:
+         \`\`\`markdown
+         # Project Guideline
+         
+         ## Introduction
+         ...
+         \`\`\`
+
+         The previous content is:
+         ${previousArtifact}
+
+         Now, please generate the document for the following requirement:
+         ${requirementStr}
+      `;
+
+      const response = await llm.complete({
+        prompt,
+      });
+
+      // Extract the document from the response
+      const docMatch = response.text.match(/```(markdown|html)([\s\S]*)```/);
+      const generatedContent = response.text;
+
+      if (docMatch) {
+        const content = docMatch[2].trim();
+        const docType = docMatch[1] as "markdown" | "html";
+
+        // Put the generated document to the memory
+        state.memory.put({
+          role: "assistant",
+          content: `Generated document: \n${response.text}`,
+        });
+
+        // To show the Canvas panel for the artifact
+        sendEvent(
+          artifactEvent.with({
+            type: "artifact",
+            data: {
+              type: "document",
+              created_at: Date.now(),
+              data: {
+                title: requirement.title,
+                content: content,
+                type: docType,
+              },
+            },
+          }),
+        );
+      }
+
+      return synthesizeAnswerEvent.with({
+        requirement,
+        generatedArtifact: generatedContent,
+      });
+    },
+  );
+
+  workflow.handle([synthesizeAnswerEvent], async ({ data }) => {
+    const { sendEvent } = getContext();
+    const { state } = getContext();
+
+    const chatHistory = await state.memory.getMessages();
+    const messages = [
+      ...chatHistory,
+      {
+        role: "system" as const,
+        content: `
+                Your responsibility is to explain the work to the user.
+                If there is no document to update, explain the reason.
+                If the document is updated, just summarize what changed. Don't need to include the whole document again in the response.
+                `,
+      },
+    ];
+
+    const responseStream = await llm.chat({
+      messages,
+      stream: true,
+    });
+
+    sendEvent(
+      uiEvent.with({
+        type: "ui_event",
+        data: {
+          state: "completed",
+          requirement: data.requirement.requirement,
+        },
+      }),
+    );
+
+    let response = "";
+    for await (const chunk of responseStream) {
+      response += chunk.delta;
+      sendEvent(
+        agentStreamEvent.with({
+          delta: chunk.delta,
+          response: "",
+          currentAgentName: "assistant",
+          raw: chunk,
+        }),
+      );
+    }
+
+    return stopAgentEvent.with({
+      result: response,
+    });
+  });
+
+  return workflow;
+}
@@ -41,6 +41,14 @@ curl --location 'localhost:3000/api/chat' \
 --data '{ "messages": [{ "role": "user", "content": "Generate a financial report that compares the financial performance of Apple and Tesla" }] }'
 ```

+## Eject Mode
+
+If you want to fully customize the server UI and routes, you can use `npm eject`. It will create a normal Next.js project with the same functionality as @llamaindex/server.
+
+```bash
+npm run eject
+```
+
 ## Learn More

 To learn more about LlamaIndex, take a look at the following resources:
@@ -0,0 +1,318 @@
+import { toAgentRunEvent, toSourceEvent } from "@llamaindex/server";
+import {
+  callTools,
+  chatWithTools,
+  documentGenerator,
+  interpreter,
+} from "@llamaindex/tools";
+import {
+  agentStreamEvent,
+  createStatefulMiddleware,
+  createWorkflow,
+  startAgentEvent,
+  stopAgentEvent,
+  workflowEvent,
+} from "@llamaindex/workflow";
+import {
+  BaseToolWithCall,
+  ChatMemoryBuffer,
+  ChatMessage,
+  Metadata,
+  NodeWithScore,
+  Settings,
+  ToolCall,
+  ToolCallLLM,
+} from "llamaindex";
+import { getIndex } from "./data";
+
+export async function workflowFactory(reqBody: any) {
+  const index = await getIndex(reqBody?.data);
+
+  const queryEngineTool = index.queryTool({
+    metadata: {
+      name: "query_document",
+      description: `This tool can retrieve information about Apple and Tesla financial data`,
+    },
+    includeSourceNodes: true,
+  });
+
+  if (!process.env.E2B_API_KEY) {
+    throw new Error("E2B_API_KEY is required to use the code interpreter tool");
+  }
+
+  const codeInterpreterTool = interpreter({
+    apiKey: process.env.E2B_API_KEY!,
+  });
+  const documentGeneratorTool = documentGenerator();
+
+  return getWorkflow(
+    queryEngineTool,
+    codeInterpreterTool,
+    documentGeneratorTool,
+  );
+}
+
+// workflow events
+const inputEvent = workflowEvent<{ input: ChatMessage[] }>();
+const researchEvent = workflowEvent<{ toolCalls: ToolCall[] }>();
+const analyzeEvent = workflowEvent<{ input: ChatMessage | ToolCall[] }>();
+const reportGenerationEvent = workflowEvent<{ toolCalls: ToolCall[] }>();
+
+const DEFAULT_SYSTEM_PROMPT = `
+You are a financial analyst who are given a set of tools to help you.
+It's good to using appropriate tools for the user request and always use the information from the tools, don't make up anything yourself.
+For the query engine tool, you should break down the user request into a list of queries and call the tool with the queries.
+`;
+
+// workflow definition
+export function getWorkflow(
+  queryEngineTool: BaseToolWithCall,
+  codeInterpreterTool: BaseToolWithCall,
+  documentGeneratorTool: BaseToolWithCall,
+) {
+  const llm = Settings.llm as ToolCallLLM;
+  if (!llm.supportToolCall) {
+    throw new Error("LLM is not a ToolCallLLM");
+  }
+  const { withState, getContext } = createStatefulMiddleware(() => ({
+    memory: new ChatMemoryBuffer({ llm, chatHistory: [] }),
+  }));
+
+  const workflow = withState(createWorkflow());
+
+  // Add steps
+  workflow.handle([startAgentEvent], async ({ data }) => {
+    const { state } = getContext();
+    const { userInput, chatHistory = [] } = data;
+    if (!userInput) throw new Error("Invalid input");
+
+    state.memory.set(chatHistory);
+
+    state.memory.put({ role: "system", content: DEFAULT_SYSTEM_PROMPT });
+
+    state.memory.put({ role: "user", content: userInput });
+
+    const messages = await state.memory.getMessages();
+    return inputEvent.with({ input: messages });
+  });
+
+  workflow.handle([inputEvent], async ({ data }) => {
+    const { sendEvent, state } = getContext();
+    const chatHistory = data.input;
+
+    const tools = [codeInterpreterTool, documentGeneratorTool, queryEngineTool];
+
+    const toolCallResponse = await chatWithTools(llm, tools, chatHistory);
+
+    if (!toolCallResponse.hasToolCall()) {
+      const generator = toolCallResponse.responseGenerator;
+      let response = "";
+      if (generator) {
+        for await (const chunk of generator) {
+          response += chunk.delta;
+          sendEvent(
+            agentStreamEvent.with({
+              delta: chunk.delta,
+              response,
+              currentAgentName: "LLM", // Or derive from context if needed
+              raw: chunk.raw,
+            }),
+          );
+        }
+      }
+      return stopAgentEvent.with({ result: response });
+    }
+
+    if (toolCallResponse.hasMultipleTools()) {
+      state.memory.put({
+        role: "assistant",
+        content:
+          "Calling different tools is not allowed. Please only use multiple calls of the same tool.",
+      });
+      const newChatHistory = await state.memory.getMessages();
+      return inputEvent.with({ input: newChatHistory });
+    }
+
+    // Put the LLM tool call message into the memory
+    // And trigger the next step according to the tool call
+    if (toolCallResponse.toolCallMessage) {
+      state.memory.put(toolCallResponse.toolCallMessage);
+    }
+    const toolName = toolCallResponse.getToolNames()[0];
+    switch (toolName) {
+      case codeInterpreterTool.metadata.name:
+        return analyzeEvent.with({
+          input: toolCallResponse.toolCalls,
+        });
+      case documentGeneratorTool.metadata.name:
+        return reportGenerationEvent.with({
+          toolCalls: toolCallResponse.toolCalls,
+        });
+      default:
+        if (queryEngineTool.metadata.name === toolName) {
+          return researchEvent.with({
+            toolCalls: toolCallResponse.toolCalls,
+          });
+        }
+        throw new Error(`Unknown tool: ${toolName}`);
+    }
+  });
+
+  workflow.handle([researchEvent], async ({ data }) => {
+    const { sendEvent, state } = getContext();
+    sendEvent(
+      toAgentRunEvent({
+        agent: "Researcher",
+        text: "Researching data",
+        type: "text",
+      }),
+    );
+
+    const { toolCalls } = data;
+
+    const toolMsgs = await callTools({
+      tools: [queryEngineTool],
+      toolCalls,
+      writeEvent: (text, step) => {
+        sendEvent(
+          toAgentRunEvent({
+            agent: "Researcher",
+            text,
+            type: toolCalls.length > 1 ? "progress" : "text",
+            current: step,
+            total: toolCalls.length,
+          }),
+        );
+      },
+    });
+    for (const toolMsg of toolMsgs) {
+      state.memory.put(toolMsg);
+    }
+
+    const sourcesNodes: NodeWithScore<Metadata>[] = toolMsgs
+      .map((msg) => (msg.options as any)?.toolResult?.result?.sourceNodes)
+      .flat()
+      .filter(Boolean);
+
+    if (sourcesNodes.length > 0) {
+      sendEvent(toSourceEvent(sourcesNodes));
+    }
+
+    // Send a message indicating research is done, triggering analysis
+    return analyzeEvent.with({
+      input: {
+        role: "assistant",
+        content:
+          "I have finished researching the data, please analyze the data.",
+      },
+    });
+  });
+
+  /**
+   * Analyze a research result or a tool call for code interpreter from the LLM
+   */
+  workflow.handle([analyzeEvent], async ({ data }) => {
+    const { sendEvent, state } = getContext();
+    sendEvent(
+      toAgentRunEvent({
+        agent: "Analyst",
+        text: "Analyzing data",
+        type: "text",
+      }),
+    );
+    // Request by workflow LLM, input is a list of tool calls
+    let toolCalls: ToolCall[] = [];
+    if (Array.isArray(data.input)) {
+      toolCalls = data.input;
+    } else {
+      // Requested by Researcher, input is a ChatMessage
+      // We start new LLM chat specifically for analyzing the data
+      const analysisPrompt = `
+      You are an expert in analyzing financial data.
+      You are given a set of financial data to analyze. Your task is to analyze the financial data and return a report.
+      Your response should include a detailed analysis of the financial data, including any trends, patterns, or insights that you find.
+      Construct the analysis in textual format; including tables would be great!
+      Don't need to synthesize the data, just analyze and provide your findings.
+      `;
+
+      // Clone the current chat history
+      // Add the analysis system prompt and the message from the researcher
+      const currentChatHistory = await state.memory.getMessages();
+      const newChatHistory = [
+        ...currentChatHistory,
+        { role: "system", content: analysisPrompt },
+        data.input, // This is the ChatMessage from the research step
+      ];
+      const toolCallResponse = await chatWithTools(
+        llm,
+        [codeInterpreterTool],
+        newChatHistory as ChatMessage[],
+      );
+
+      if (!toolCallResponse.hasToolCall()) {
+        // If no tool call needed for analysis, put the response directly
+        state.memory.put(await toolCallResponse.asFullResponse());
+        const finalChatHistory = await state.memory.getMessages();
+        return inputEvent.with({ input: finalChatHistory });
+      } else {
+        state.memory.put(toolCallResponse.toolCallMessage!);
+        toolCalls = toolCallResponse.toolCalls;
+      }
+    }
+
+    // Call the code interpreter tools if needed
+    if (toolCalls.length > 0) {
+      const toolMsgs = await callTools({
+        tools: [codeInterpreterTool],
+        toolCalls,
+        writeEvent: (text, step) => {
+          sendEvent(
+            toAgentRunEvent({
+              agent: "Analyst",
+              text,
+              type: toolCalls.length > 1 ? "progress" : "text",
+              current: step,
+              total: toolCalls.length,
+            }),
+          );
+        },
+      });
+      for (const toolMsg of toolMsgs) {
+        state.memory.put(toolMsg);
+      }
+    }
+
+    const finalChatHistory = await state.memory.getMessages();
+    // After analysis (or tool calls for analysis), trigger the next LLM input cycle
+    return inputEvent.with({ input: finalChatHistory });
+  });
+
+  workflow.handle([reportGenerationEvent], async ({ data }) => {
+    const { sendEvent, state } = getContext();
+    const { toolCalls } = data;
+
+    const toolMsgs = await callTools({
+      tools: [documentGeneratorTool],
+      toolCalls,
+      writeEvent: (text, step) => {
+        sendEvent(
+          toAgentRunEvent({
+            agent: "Reporter",
+            text,
+            type: toolCalls.length > 1 ? "progress" : "text",
+            current: step,
+            total: toolCalls.length,
+          }),
+        );
+      },
+    });
+    for (const toolMsg of toolMsgs) {
+      state.memory.put(toolMsg);
+    }
+    const chatHistory = await state.memory.getMessages();
+    // After report generation, trigger the next LLM input cycle
+    return inputEvent.with({ input: chatHistory });
+  });
+
+  return workflow;
+}
@@ -0,0 +1,39 @@
+import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
+import "dotenv/config";
+import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
+import { initSettings } from "./app/settings";
+
+async function generateDatasource() {
+  console.log(`Generating storage context...`);
+  // Split documents, create embeddings and store them in the storage context
+  const storageContext = await storageContextFromDefaults({
+    persistDir: "storage",
+  });
+  // load documents from current directory into an index
+  const reader = new SimpleDirectoryReader();
+  const documents = await reader.loadData("data");
+
+  await VectorStoreIndex.fromDocuments(documents, {
+    storageContext,
+  });
+  console.log("Storage context successfully generated.");
+}
+
+(async () => {
+  const args = process.argv.slice(2);
+  const command = args[0];
+
+  initSettings();
+
+  if (command === "ui") {
+    console.error("This project doesn't use any custom UI.");
+    return;
+  } else {
+    if (command !== "datasource") {
+      console.error(
+        `Unrecognized command: ${command}. Generating datasource by default.`,
+      );
+    }
+    await generateDatasource();
+  }
+})();
@@ -12,11 +12,12 @@ from llama_index.server.services.llamacloud.generate import (
    load_to_llamacloud,
 )

+
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger()


-def generate_datasource():
+def generate_index():
    init_settings()
    logger.info("Generate index for the provided data")

@@ -27,5 +28,26 @@ def generate_datasource():
    load_to_llamacloud(index, logger=logger)


-if __name__ == "__main__":
-    generate_datasource()
+def generate_ui_for_workflow():
+    """
+    Generate UI for UIEventData event in app/workflow.py
+    """
+    import asyncio
+    from llama_index.llms.openai import OpenAI
+    from main import COMPONENT_DIR
+
+    # To generate UI components for additional event types,
+    # import the corresponding data model (e.g., MyCustomEventData)
+    # and run the generate_ui_for_workflow function with the imported model.
+    # Make sure the output filename of the generated UI component matches the event type (here `ui_event`)
+    try:
+        from app.workflow import UIEventData  # type: ignore
+    except ImportError:
+        raise ImportError("Couldn't generate UI component for the current workflow.")
+    from llama_index.server.gen_ui import generate_event_component
+
+    # works also well with Claude 3.7 Sonnet or Gemini Pro 2.5
+    llm = OpenAI(model="gpt-4.1")
+    code = asyncio.run(generate_event_component(event_cls=UIEventData, llm=llm))
+    with open(f"{COMPONENT_DIR}/ui_event.jsx", "w") as f:
+        f.write(code)
@@ -1,3 +1,5 @@
+import { OpenAI } from "@llamaindex/openai";
+import { generateEventComponent } from "@llamaindex/server";
 import * as dotenv from "dotenv";
 import "dotenv/config";
 import * as fs from "fs/promises";
@@ -88,7 +90,7 @@ async function loadAndIndex() {
  console.log(`Successfully uploaded documents to LlamaCloud!`);
 }

-(async () => {
+async function generateDatasource() {
  try {
    checkRequiredEnvVars();
    initSettings();
@@ -97,4 +99,39 @@ async function loadAndIndex() {
  } catch (error) {
    console.error("Error generating storage.", error);
  }
+}
+
+async function generateUi() {
+  // Also works well with Claude 3.5 Sonnet and Google Gemini 2.5 Pro
+  const llm = new OpenAI({ model: "gpt-4.1" });
+
+  const workflowModule = await import("./app/workflow");
+  const UIEventSchema = (workflowModule as any).UIEventSchema;
+  if (!UIEventSchema) {
+    throw new Error(
+      "To generate the UI, you must define a UIEventSchema in your workflow.",
+    );
+  }
+
+  const generatedCode = await generateEventComponent(UIEventSchema, llm);
+  // Write the generated code to components/ui_event.ts
+  await fs.writeFile("components/ui_event.jsx", generatedCode);
+}
+
+(async () => {
+  const args = process.argv.slice(2);
+  const command = args[0];
+
+  initSettings();
+
+  if (command === "datasource") {
+    await generateDatasource();
+  } else if (command === "ui") {
+    await generateUi();
+  } else {
+    console.error(
+      'Invalid command. Please use "datasource" or "ui". Running "datasource" by default.',
+    );
+    await generateDatasource(); // Default behavior or could throw an error
+  }
 })();
@@ -1,4 +1,4 @@
-import { LlamaCloudIndex } from "llamaindex/cloud/LlamaCloudIndex";
+import { LlamaCloudIndex } from "llamaindex";

 type LlamaCloudDataSourceParams = {
  llamaCloudPipeline?: {
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { AstraDBVectorStore } from "@llamaindex/astra";
 import * as dotenv from "dotenv";
 import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { AstraDBVectorStore } from "@llamaindex/astra";
 import { VectorStoreIndex } from "llamaindex";
 import { checkRequiredEnvVars } from "./shared";
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { ChromaVectorStore } from "@llamaindex/chroma";
 import * as dotenv from "dotenv";
 import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { ChromaVectorStore } from "@llamaindex/chroma";
 import { VectorStoreIndex } from "llamaindex";
 import { checkRequiredEnvVars } from "./shared";
@@ -1,4 +1,4 @@
-import { LlamaCloudIndex } from "llamaindex/cloud/LlamaCloudIndex";
+import { LlamaCloudIndex } from "llamaindex";

 type LlamaCloudDataSourceParams = {
  llamaCloudPipeline?: {
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { MilvusVectorStore } from "@llamaindex/milvus";
 import * as dotenv from "dotenv";
 import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { MongoDBAtlasVectorSearch } from "@llamaindex/mongodb";
 import * as dotenv from "dotenv";
 import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { MongoDBAtlasVectorSearch } from "@llamaindex/mongodb";
 import { VectorStoreIndex } from "llamaindex";
 import { MongoClient } from "mongodb";
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { PineconeVectorStore } from "@llamaindex/pinecone";
 import * as dotenv from "dotenv";
 import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { PineconeVectorStore } from "@llamaindex/pinecone";
 import { VectorStoreIndex } from "llamaindex";
 import { checkRequiredEnvVars } from "./shared";
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { QdrantVectorStore } from "@llamaindex/qdrant";
 import * as dotenv from "dotenv";
 import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
@@ -1,4 +1,3 @@
-/* eslint-disable turbo/no-undeclared-env-vars */
 import { WeaviateVectorStore } from "@llamaindex/weaviate";
 import * as dotenv from "dotenv";
 import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
@@ -1,447 +0,0 @@
-import { toSourceEvent, toStreamGenerator } from "@llamaindex/server";
-import {
-  AgentInputData,
-  AgentWorkflowContext,
-  ChatMemoryBuffer,
-  ChatResponseChunk,
-  HandlerContext,
-  LlamaCloudIndex,
-  Metadata,
-  MetadataMode,
-  NodeWithScore,
-  PromptTemplate,
-  Settings,
-  StartEvent,
-  StopEvent as StopEventBase,
-  ToolCallLLM,
-  VectorStoreIndex,
-  Workflow,
-  WorkflowEvent,
-} from "llamaindex";
-import { randomUUID } from "node:crypto";
-import { z } from "zod";
-import { getIndex } from "./data";
-
-// workflow factory
-export const workflowFactory = async (reqBody: any) => {
-  const index = await getIndex(reqBody?.data);
-  return new DeepResearchWorkflow(index);
-};
-
-// workflow configs
-const MAX_QUESTIONS = 6; // max number of questions to research, research will stop when this number is reached
-const TIMEOUT = 360; // timeout in seconds
-const TOP_K = 10; // number of nodes to retrieve from the vector store
-
-const createPlanResearchPrompt = new PromptTemplate({
-  template: `
-You are a professor who is guiding a researcher to research a specific request/problem.
-Your task is to decide on a research plan for the researcher.
-
-The possible actions are:
-+ Provide a list of questions for the researcher to investigate, with the purpose of clarifying the request.
-+ Write a report if the researcher has already gathered enough research on the topic and can resolve the initial request.
-+ Cancel the research if most of the answers from researchers indicate there is insufficient information to research the request. Do not attempt more than 3 research iterations or too many questions.
-
-The workflow should be:
-+ Always begin by providing some initial questions for the researcher to investigate.
-+ Analyze the provided answers against the initial topic/request. If the answers are insufficient to resolve the initial request, provide additional questions for the researcher to investigate.
-+ If the answers are sufficient to resolve the initial request, instruct the researcher to write a report.
-
-Here are the context: 
-<Collected information>
-{context_str}
-</Collected information>
-
-<Conversation context>
-{conversation_context}
-</Conversation context>
-
-{enhanced_prompt}
-
-Now, provide your decision in the required format for this user request:
-<User request>
-{user_request}
-</User request>
-`,
-  templateVars: [
-    "context_str",
-    "conversation_context",
-    "enhanced_prompt",
-    "user_request",
-  ],
-});
-
-const researchPrompt = new PromptTemplate({
-  template: `
-You are a researcher who is in the process of answering the question.
-The purpose is to answer the question based on the collected information, without using prior knowledge or making up any new information.
-Always add citations to the sentence/point/paragraph using the id of the provided content.
-The citation should follow this format: [citation:id] where id is the id of the content.
-
-E.g:
-If we have a context like this:
-<Citation id='abc-xyz'>
-Baby llama is called cria
-</Citation id='abc-xyz'>
-
-And your answer uses the content, then the citation should be:
- Baby llama is called cria [citation:abc-xyz]
-
- Here is the provided context for the question:
-<Collected information>
-{context_str}
-</Collected information>
-
-No prior knowledge, just use the provided context to answer the question: {question}
-`,
-  templateVars: ["context_str", "question"],
-});
-
-const WRITE_REPORT_PROMPT = `
-You are a researcher writing a report based on a user request and the research context.
-You have researched various perspectives related to the user request.
-The report should provide a comprehensive outline covering all important points from the researched perspectives.
-Create a well-structured outline for the research report that covers all the answers.
-
-# IMPORTANT when writing in markdown format:
-+ Use tables or figures where appropriate to enhance presentation.
-+ Preserve all citation syntax (the \`[citation:id]()\` parts in the provided context). Keep these citations in the final report - no separate reference section is needed.
-+ Do not add links, a table of contents, or a references section to the report.
-`;
-
-// workflow events
-type ResearchQuestion = { questionId: string; question: string };
-type ResearchResult = ResearchQuestion & { answer: string };
-
-class PlanResearchEvent extends WorkflowEvent<{}> {}
-class ResearchEvent extends WorkflowEvent<ResearchQuestion[]> {}
-class ReportEvent extends WorkflowEvent<{}> {}
-class StopEvent extends StopEventBase<AsyncGenerator<ChatResponseChunk>> {}
-
-export const UIEventSchema = z
-  .object({
-    event: z
-      .enum(["retrieve", "analyze", "answer"])
-      .describe(
-        "The type of event. DeepResearch has 3 main stages:\n1. retrieve: Retrieve the context from the vector store\n2. analyze: Analyze the context and generate a research questions to answer\n3. answer: Answer the provided questions. Each question has a unique id, when the state is done, the event will have the answer for the question.",
-      ),
-    state: z
-      .enum(["pending", "inprogress", "done", "error"])
-      .describe("The state for each event"),
-    id: z.string().optional().describe("The id of the question"),
-    question: z
-      .string()
-      .optional()
-      .describe("The question generated by the LLM"),
-    answer: z.string().optional().describe("The answer generated by the LLM"),
-  })
-  .describe("DeepResearchEvent");
-
-type UIEventData = z.infer<typeof UIEventSchema>;
-
-class UIEvent extends WorkflowEvent<{
-  type: "ui_event";
-  data: UIEventData;
-}> {}
-
-// workflow definition
-class DeepResearchWorkflow extends Workflow<
-  AgentWorkflowContext,
-  AgentInputData,
-  string
-> {
-  #llm = Settings.llm as ToolCallLLM;
-  #index?: VectorStoreIndex | LlamaCloudIndex;
-
-  userRequest: string = "";
-  totalQuestions: number = 0;
-  contextNodes: NodeWithScore<Metadata>[] = [];
-  memory: ChatMemoryBuffer = new ChatMemoryBuffer({ llm: Settings.llm });
-
-  constructor(index: VectorStoreIndex | LlamaCloudIndex) {
-    super({ timeout: TIMEOUT });
-    this.#index = index;
-    this.addWorkflowSteps();
-  }
-
-  addWorkflowSteps() {
-    this.addStep(
-      {
-        inputs: [StartEvent<AgentInputData>],
-        outputs: [PlanResearchEvent],
-      },
-      this.handleStartWorkflow,
-    );
-    this.addStep(
-      {
-        inputs: [PlanResearchEvent],
-        outputs: [ResearchEvent, ReportEvent, StopEvent],
-      },
-      this.handlePlanResearch,
-    );
-    this.addStep(
-      {
-        inputs: [ResearchEvent],
-        outputs: [PlanResearchEvent],
-      },
-      this.handleResearch,
-    );
-    this.addStep(
-      {
-        inputs: [ReportEvent],
-        outputs: [StopEvent],
-      },
-      this.handleReport,
-    );
-  }
-
-  async initWorkflow(data: AgentInputData) {
-    const { userInput, chatHistory = [] } = data;
-    if (!userInput) throw new Error("Invalid input");
-
-    this.userRequest = userInput;
-
-    await this.memory.set(chatHistory);
-    await this.memory.put({ role: "user", content: userInput });
-  }
-
-  handleStartWorkflow = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: StartEvent<AgentInputData>,
-  ): Promise<PlanResearchEvent> => {
-    await this.initWorkflow(ev.data);
-
-    ctx.sendEvent(
-      new UIEvent({
-        type: "ui_event",
-        data: { event: "retrieve", state: "inprogress" },
-      }),
-    );
-
-    const retrievedNodes = await this.retriever.retrieve(this.userRequest);
-
-    ctx.sendEvent(toSourceEvent(retrievedNodes));
-
-    ctx.sendEvent(
-      new UIEvent({
-        type: "ui_event",
-        data: { event: "retrieve", state: "done" },
-      }),
-    );
-
-    this.contextNodes = retrievedNodes;
-
-    return new PlanResearchEvent({});
-  };
-
-  handlePlanResearch = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: PlanResearchEvent,
-  ): Promise<ResearchEvent | ReportEvent | StopEvent> => {
-    ctx.sendEvent(
-      new UIEvent({
-        type: "ui_event",
-        data: { event: "analyze", state: "inprogress" },
-      }),
-    );
-
-    const { decision, researchQuestions, cancelReason } =
-      await this.createResearchPlan();
-
-    // Stop workflow due to decision from LLM
-    if (decision === "cancel") {
-      ctx.sendEvent(
-        new UIEvent({
-          type: "ui_event",
-          data: { event: "analyze", state: "done" },
-        }),
-      );
-      return new StopEvent(
-        toStreamGenerator(
-          cancelReason ?? "Research cancelled without any reason.",
-        ),
-      );
-    }
-
-    // Trigger research from generated questions
-    if (decision === "research") {
-      this.memory.put({
-        role: "assistant",
-        content:
-          "We need to find answers to the following questions:\n" +
-          researchQuestions.join("\n"),
-      });
-
-      researchQuestions.forEach(({ questionId: id, question }) => {
-        ctx.sendEvent(
-          new UIEvent({
-            type: "ui_event",
-            data: { event: "answer", state: "pending", id, question },
-          }),
-        );
-      });
-
-      return new ResearchEvent(researchQuestions);
-    }
-
-    // Resarch done, start writing report
-    this.memory.put({
-      role: "assistant",
-      content: "No more idea to analyze. We should report the answers.",
-    });
-
-    ctx.sendEvent(
-      new UIEvent({
-        type: "ui_event",
-        data: { event: "analyze", state: "done" },
-      }),
-    );
-
-    return new ReportEvent({});
-  };
-
-  handleResearch = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: ResearchEvent,
-  ): Promise<PlanResearchEvent> => {
-    const researchQuestions = ev.data;
-
-    // Answer questions in parallel
-    const researchResults: ResearchResult[] = await Promise.all(
-      researchQuestions.map(async ({ questionId: id, question }) => {
-        ctx.sendEvent(
-          new UIEvent({
-            type: "ui_event",
-            data: { event: "answer", state: "inprogress", id, question },
-          }),
-        );
-
-        const answer = await this.answerQuestion(question);
-
-        ctx.sendEvent(
-          new UIEvent({
-            type: "ui_event",
-            data: { event: "answer", state: "done", id, question, answer },
-          }),
-        );
-
-        return { questionId: id, question, answer };
-      }),
-    );
-
-    // Save answers to memory
-    researchResults.forEach(({ question, answer }) => {
-      this.memory.put({
-        role: "assistant",
-        content: `<Question>${question}</Question>\n<Answer>${answer}</Answer>`,
-      });
-    });
-
-    this.memory.put({
-      role: "assistant",
-      content:
-        "Researched all the questions. Now, I need to analyze if it's ready to write a report or need to research more.",
-    });
-
-    this.totalQuestions += researchResults.length;
-
-    return new PlanResearchEvent({});
-  };
-
-  handleReport = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: ReportEvent,
-  ): Promise<StopEvent> => {
-    const chatHistory = await this.memory.getAllMessages();
-
-    const messages = chatHistory.concat([
-      {
-        role: "system",
-        content: WRITE_REPORT_PROMPT,
-      },
-      {
-        role: "user",
-        content:
-          "Write a report addressing the user request based on the research provided the context",
-      },
-    ]);
-
-    const stream = await this.llm.chat({ messages, stream: true });
-
-    return new StopEvent(toStreamGenerator(stream));
-  };
-
-  get llm() {
-    if (!this.#llm.supportToolCall) throw new Error("LLM is not a ToolCallLLM");
-    return this.#llm;
-  }
-
-  get retriever() {
-    if (!this.#index) throw new Error("Index is not initialized");
-    return this.#index.asRetriever({ similarityTopK: TOP_K });
-  }
-
-  get contextStr() {
-    return this.contextNodes
-      .map((node) => {
-        const nodeId = node.node.id_;
-        const nodeContent = node.node.getContent(MetadataMode.NONE);
-        return `<Citation id='${nodeId}'>\n${nodeContent}</Citation id='${nodeId}'>`;
-      })
-      .join("\n");
-  }
-
-  get enhancedPrompt() {
-    if (this.totalQuestions === 0) {
-      return "The student has no questions to research. Let start by asking some questions.";
-    }
-
-    if (this.totalQuestions > MAX_QUESTIONS) {
-      return `The student has researched ${this.totalQuestions} questions. Should cancel the research if the context is not enough to write a report.`;
-    }
-
-    return "";
-  }
-
-  async createResearchPlan() {
-    const chatHistory = await this.memory.getMessages();
-
-    const conversationContext = chatHistory
-      .map((message) => `${message.role}: ${message.content}`)
-      .join("\n");
-
-    const prompt = createPlanResearchPrompt.format({
-      context_str: this.contextStr,
-      conversation_context: conversationContext,
-      enhanced_prompt: this.enhancedPrompt,
-      user_request: this.userRequest,
-    });
-
-    const responseFormat = z.object({
-      decision: z.enum(["research", "write", "cancel"]),
-      researchQuestions: z.array(z.string()),
-      cancelReason: z.string().optional(),
-    });
-
-    const result = await this.llm.complete({ prompt, responseFormat });
-    const plan = JSON.parse(result.text) as z.infer<typeof responseFormat>;
-
-    return {
-      ...plan,
-      researchQuestions: plan.researchQuestions.map((question) => ({
-        questionId: randomUUID(),
-        question,
-      })),
-    };
-  }
-
-  async answerQuestion(question: string) {
-    const prompt = researchPrompt.format({
-      context_str: this.contextStr,
-      question,
-    });
-    const result = await this.llm.complete({ prompt });
-    return result.text;
-  }
-}
@@ -1,396 +0,0 @@
-import { toAgentRunEvent, toSourceEvent } from "@llamaindex/server";
-import {
-  callTools,
-  chatWithTools,
-  documentGenerator,
-  interpreter,
-} from "@llamaindex/tools";
-import {
-  AgentInputData,
-  AgentWorkflowContext,
-  BaseToolWithCall,
-  ChatMemoryBuffer,
-  ChatMessage,
-  ChatResponseChunk,
-  HandlerContext,
-  Metadata,
-  NodeWithScore,
-  Settings,
-  StartEvent,
-  StopEvent,
-  ToolCall,
-  ToolCallLLM,
-  Workflow,
-  WorkflowEvent,
-} from "llamaindex";
-import { getIndex } from "./data";
-
-const TIMEOUT = 360 * 1000;
-
-export async function workflowFactory(reqBody: any) {
-  const index = await getIndex(reqBody?.data);
-
-  const queryEngineTool = index.queryTool({
-    metadata: {
-      name: "query_document",
-      description: `This tool can retrieve information about Apple and Tesla financial data`,
-    },
-    includeSourceNodes: true,
-  });
-
-  if (!process.env.E2B_API_KEY) {
-    throw new Error("E2B_API_KEY is required to use the code interpreter tool");
-  }
-
-  const codeInterpreterTool = interpreter({
-    apiKey: process.env.E2B_API_KEY!,
-  });
-  const documentGeneratorTool = documentGenerator();
-
-  return new FinancialReportWorkflow({
-    queryEngineTool,
-    codeInterpreterTool,
-    documentGeneratorTool,
-    timeout: TIMEOUT,
-  });
-}
-
-// Create a custom event type
-class InputEvent extends WorkflowEvent<{ input: ChatMessage[] }> {}
-
-class ResearchEvent extends WorkflowEvent<{
-  toolCalls: ToolCall[];
-}> {}
-
-class AnalyzeEvent extends WorkflowEvent<{
-  input: ChatMessage | ToolCall[];
-}> {}
-
-class ReportGenerationEvent extends WorkflowEvent<{
-  toolCalls: ToolCall[];
-}> {}
-
-const DEFAULT_SYSTEM_PROMPT = `
-You are a financial analyst who are given a set of tools to help you.
-It's good to using appropriate tools for the user request and always use the information from the tools, don't make up anything yourself.
-For the query engine tool, you should break down the user request into a list of queries and call the tool with the queries.
-`;
-
-class FinancialReportWorkflow extends Workflow<
-  AgentWorkflowContext,
-  AgentInputData,
-  string
-> {
-  llm: ToolCallLLM;
-  memory: ChatMemoryBuffer;
-  queryEngineTool: BaseToolWithCall;
-  codeInterpreterTool: BaseToolWithCall;
-  documentGeneratorTool: BaseToolWithCall;
-  systemPrompt?: string;
-
-  constructor(options: {
-    queryEngineTool: BaseToolWithCall;
-    codeInterpreterTool: BaseToolWithCall;
-    documentGeneratorTool: BaseToolWithCall;
-    systemPrompt?: string;
-    verbose?: boolean;
-    timeout?: number;
-  }) {
-    super({
-      verbose: options?.verbose ?? false,
-      timeout: options?.timeout ?? 360,
-    });
-
-    this.llm = Settings.llm as ToolCallLLM;
-    if (!this.llm.supportToolCall) {
-      throw new Error("LLM is not a ToolCallLLM");
-    }
-    this.systemPrompt = options.systemPrompt ?? DEFAULT_SYSTEM_PROMPT;
-    this.queryEngineTool = options.queryEngineTool;
-    this.codeInterpreterTool = options.codeInterpreterTool;
-
-    this.documentGeneratorTool = options.documentGeneratorTool;
-    this.memory = new ChatMemoryBuffer({ llm: this.llm, chatHistory: [] });
-
-    // Add steps
-    this.addStep(
-      {
-        inputs: [StartEvent<AgentInputData>],
-        outputs: [InputEvent],
-      },
-      this.prepareChatHistory,
-    );
-
-    this.addStep(
-      {
-        inputs: [InputEvent],
-        outputs: [
-          InputEvent,
-          ResearchEvent,
-          AnalyzeEvent,
-          ReportGenerationEvent,
-          StopEvent,
-        ],
-      },
-      this.handleLLMInput,
-    );
-
-    this.addStep(
-      {
-        inputs: [ResearchEvent],
-        outputs: [AnalyzeEvent],
-      },
-      this.handleResearch,
-    );
-
-    this.addStep(
-      {
-        inputs: [AnalyzeEvent],
-        outputs: [InputEvent],
-      },
-      this.handleAnalyze,
-    );
-
-    this.addStep(
-      {
-        inputs: [ReportGenerationEvent],
-        outputs: [InputEvent],
-      },
-      this.handleReportGeneration,
-    );
-  }
-
-  prepareChatHistory = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: StartEvent<AgentInputData>,
-  ): Promise<InputEvent> => {
-    const { userInput, chatHistory = [] } = ev.data;
-    if (!userInput) throw new Error("Invalid input");
-
-    this.memory.set(chatHistory);
-
-    if (this.systemPrompt) {
-      this.memory.put({ role: "system", content: this.systemPrompt });
-    }
-
-    this.memory.put({ role: "user", content: userInput });
-
-    const messages = await this.memory.getMessages();
-    return new InputEvent({ input: messages });
-  };
-
-  handleLLMInput = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: InputEvent,
-  ): Promise<
-    | InputEvent
-    | ResearchEvent
-    | AnalyzeEvent
-    | ReportGenerationEvent
-    | StopEvent<AsyncGenerator<ChatResponseChunk, any, any> | undefined>
-  > => {
-    const chatHistory = ev.data.input;
-
-    const tools = [
-      this.codeInterpreterTool,
-      this.documentGeneratorTool,
-      this.queryEngineTool,
-    ];
-
-    const toolCallResponse = await chatWithTools(this.llm, tools, chatHistory);
-
-    if (!toolCallResponse.hasToolCall()) {
-      return new StopEvent(toolCallResponse.responseGenerator);
-    }
-
-    if (toolCallResponse.hasMultipleTools()) {
-      this.memory.put({
-        role: "assistant",
-        content:
-          "Calling different tools is not allowed. Please only use multiple calls of the same tool.",
-      });
-      const chatHistory = await this.memory.getMessages();
-      return new InputEvent({ input: chatHistory });
-    }
-
-    // Put the LLM tool call message into the memory
-    // And trigger the next step according to the tool call
-    if (toolCallResponse.toolCallMessage) {
-      this.memory.put(toolCallResponse.toolCallMessage);
-    }
-    const toolName = toolCallResponse.getToolNames()[0];
-    switch (toolName) {
-      case this.codeInterpreterTool.metadata.name:
-        return new AnalyzeEvent({
-          input: toolCallResponse.toolCalls,
-        });
-      case this.documentGeneratorTool.metadata.name:
-        return new ReportGenerationEvent({
-          toolCalls: toolCallResponse.toolCalls,
-        });
-      default:
-        if (this.queryEngineTool.metadata.name === toolName) {
-          return new ResearchEvent({
-            toolCalls: toolCallResponse.toolCalls,
-          });
-        }
-        throw new Error(`Unknown tool: ${toolName}`);
-    }
-  };
-
-  handleResearch = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: ResearchEvent,
-  ): Promise<AnalyzeEvent> => {
-    ctx.sendEvent(
-      toAgentRunEvent({
-        agent: "Researcher",
-        text: "Researching data",
-        type: "text",
-      }),
-    );
-
-    const { toolCalls } = ev.data;
-
-    const toolMsgs = await callTools({
-      tools: [this.queryEngineTool],
-      toolCalls,
-      writeEvent: (text, step) => {
-        ctx.sendEvent(
-          toAgentRunEvent({
-            agent: "Researcher",
-            text,
-            type: toolCalls.length > 1 ? "progress" : "text",
-            current: step,
-            total: toolCalls.length,
-          }),
-        );
-      },
-    });
-    for (const toolMsg of toolMsgs) {
-      this.memory.put(toolMsg);
-    }
-
-    const sourcesNodes: NodeWithScore<Metadata>[] = toolMsgs
-      .map((msg) => (msg.options as any)?.toolResult?.result?.sourceNodes)
-      .flat()
-      .filter(Boolean);
-
-    if (sourcesNodes.length > 0) {
-      ctx.sendEvent(toSourceEvent(sourcesNodes));
-    }
-
-    return new AnalyzeEvent({
-      input: {
-        role: "assistant",
-        content:
-          "I have finished researching the data, please analyze the data.",
-      },
-    });
-  };
-
-  /**
-   * Analyze a research result or a tool call for code interpreter from the LLM
-   */
-  handleAnalyze = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: AnalyzeEvent,
-  ): Promise<InputEvent> => {
-    ctx.sendEvent(
-      toAgentRunEvent({
-        agent: "Analyst",
-        text: "Analyzing data",
-        type: "text",
-      }),
-    );
-    // Request by workflow LLM, input is a list of tool calls
-    let toolCalls: ToolCall[] = [];
-    if (Array.isArray(ev.data.input)) {
-      toolCalls = ev.data.input;
-    } else {
-      // Requested by Researcher, input is a ChatMessage
-      // We start new LLM chat specifically for analyzing the data
-      const analysisPrompt = `
-      You are an expert in analyzing financial data.
-      You are given a set of financial data to analyze. Your task is to analyze the financial data and return a report.
-      Your response should include a detailed analysis of the financial data, including any trends, patterns, or insights that you find.
-      Construct the analysis in textual format; including tables would be great!
-      Don't need to synthesize the data, just analyze and provide your findings.
-      `;
-
-      // Clone the current chat history
-      // Add the analysis system prompt and the message from the researcher
-      const chatHistory = await this.memory.getMessages();
-      const newChatHistory = [
-        ...chatHistory,
-        { role: "system", content: analysisPrompt },
-        ev.data.input,
-      ];
-      const toolCallResponse = await chatWithTools(
-        this.llm,
-        [this.codeInterpreterTool],
-        newChatHistory as ChatMessage[],
-      );
-
-      if (!toolCallResponse.hasToolCall()) {
-        this.memory.put(await toolCallResponse.asFullResponse());
-        const chatHistory = await this.memory.getMessages();
-        return new InputEvent({ input: chatHistory });
-      } else {
-        this.memory.put(toolCallResponse.toolCallMessage!);
-        toolCalls = toolCallResponse.toolCalls;
-      }
-    }
-
-    // Call the tools
-    const toolMsgs = await callTools({
-      tools: [this.codeInterpreterTool],
-      toolCalls,
-      writeEvent: (text, step) => {
-        ctx.sendEvent(
-          toAgentRunEvent({
-            agent: "Analyst",
-            text,
-            type: toolCalls.length > 1 ? "progress" : "text",
-            current: step,
-            total: toolCalls.length,
-          }),
-        );
-      },
-    });
-    for (const toolMsg of toolMsgs) {
-      this.memory.put(toolMsg);
-    }
-
-    const chatHistory = await this.memory.getMessages();
-    return new InputEvent({ input: chatHistory });
-  };
-
-  handleReportGeneration = async (
-    ctx: HandlerContext<AgentWorkflowContext>,
-    ev: ReportGenerationEvent,
-  ): Promise<InputEvent> => {
-    const { toolCalls } = ev.data;
-
-    const toolMsgs = await callTools({
-      tools: [this.documentGeneratorTool],
-      toolCalls,
-      writeEvent: (text, step) => {
-        ctx.sendEvent(
-          toAgentRunEvent({
-            agent: "Reporter",
-            text,
-            type: toolCalls.length > 1 ? "progress" : "text",
-            current: step,
-            total: toolCalls.length,
-          }),
-        );
-      },
-    });
-    for (const toolMsg of toolMsgs) {
-      this.memory.put(toolMsg);
-    }
-    const chatHistory = await this.memory.getMessages();
-    return new InputEvent({ input: chatHistory });
-  };
-}
@@ -1,8 +1,12 @@
+import os
+
 from llama_index.core import Settings
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.llms.openai import OpenAI


 def init_settings():
-    Settings.llm = OpenAI(model="gpt-4o-mini")
-    Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
+    if os.getenv("OPENAI_API_KEY") is None:
+        raise RuntimeError("OPENAI_API_KEY is missing in environment variables")
+    Settings.llm = OpenAI(model="gpt-4.1")
+    Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")
@@ -51,7 +51,7 @@ def generate_ui_for_workflow():
    # and run the generate_ui_for_workflow function with the imported model.
    # Make sure the output filename of the generated UI component matches the event type (here `ui_event`)
    try:
-        from app.workflow import UIEventData
+        from app.workflow import UIEventData  # type: ignore
    except ImportError:
        raise ImportError("Couldn't generate UI component for the current workflow.")
    from llama_index.server.gen_ui import generate_event_component
@@ -16,9 +16,11 @@ def create_app():
        workflow_factory=create_workflow,  # A factory function that creates a new workflow for each request
        ui_config=UIConfig(
            component_dir=COMPONENT_DIR,
-            app_title="Chat App",
+            dev_mode=True,  # Please disable this in production
+            layout_dir="layout",
        ),
        logger=logger,
+        env="dev",
    )
    # You can also add custom FastAPI routes to app
    app.add_api_route("/api/health", lambda: {"message": "OK"}, status_code=200)
@@ -12,7 +12,7 @@ dependencies = [
    "pydantic<2.10",
    "aiostream>=0.5.2,<0.6.0",
    "llama-index-core>=0.12.28,<0.13.0",
-    "llama-index-server>=0.1.14,<0.2.0",
+    "llama-index-server>=0.1.17,<0.2.0",
 ]

 [project.optional-dependencies]
@@ -46,6 +46,9 @@ disable_error_code = [ "return-value", "assignment" ]
 module = "app.*"
 ignore_missing_imports = false

+[tool.hatch.metadata]
+allow-direct-references = true
+
 [build-system]
 requires = [ "hatchling>=1.24" ]
-build-backend = "hatchling.build"
+build-backend = "hatchling.build"
@@ -0,0 +1,6 @@
+{
+  "watch": ["src/**/*.ts"],
+  "exec": "nodemon --exec tsx src/index.ts",
+  "ext": "js ts",
+  "ignore": ["src/app/workflow_*.ts"]
+}
@@ -5,21 +5,23 @@
    "generate": "tsx src/generate.ts datasource",
    "generate:datasource": "tsx src/generate.ts datasource",
    "generate:ui": "tsx src/generate.ts ui",
-    "dev": "tsx watch src/index.ts",
-    "start": "tsx src/index.ts"
+    "dev": "nodemon",
+    "start": "tsx src/index.ts",
+    "eject": "llamaindex-server eject"
  },
  "dependencies": {
-    "@llamaindex/openai": "0.2.0",
-    "@llamaindex/readers": "^2.0.0",
-    "@llamaindex/server": "0.1.5",
-    "@llamaindex/tools": "0.0.4",
+    "@llamaindex/openai": "~0.4.0",
+    "@llamaindex/server": "~0.2.1",
+    "@llamaindex/workflow": "~1.1.3",
+    "@llamaindex/tools": "~0.0.11",
+    "llamaindex": "~0.11.0",
    "dotenv": "^16.4.7",
-    "zod": "^3.23.8",
-    "llamaindex": "0.10.2"
+    "zod": "^3.23.8"
  },
  "devDependencies": {
    "@types/node": "^20.10.3",
    "tsx": "^4.7.2",
-    "typescript": "^5.3.2"
+    "typescript": "^5.3.2",
+    "nodemon": "^3.1.10"
  }
 }
@@ -3,7 +3,7 @@ import { Settings } from "llamaindex";

 export function initSettings() {
  Settings.llm = new OpenAI({
-    model: "gpt-4o-mini",
+    model: "gpt-4.1",
  });
  Settings.embedModel = new OpenAIEmbedding({
    model: "text-embedding-3-small",
@@ -1,14 +1,10 @@
 import "dotenv/config";
 import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
-import {
-  OpenAI,
-  storageContextFromDefaults,
-  VectorStoreIndex,
-} from "llamaindex";
+import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
 import { initSettings } from "./app/settings";
 import fs from "fs";
 import { generateEventComponent } from "@llamaindex/server";
-import { UIEventSchema } from "./app/workflow";
+import { OpenAI } from "@llamaindex/openai";

 async function generateDatasource() {
  console.log(`Generating storage context...`);
@@ -30,6 +26,14 @@ async function generateUi() {
  // Also works well with Claude 3.5 Sonnet and Google Gemini 2.5 Pro
  const llm = new OpenAI({ model: "gpt-4.1" });

+  const workflowModule = await import("./app/workflow");
+  const UIEventSchema = (workflowModule as any).UIEventSchema;
+  if (!UIEventSchema) {
+    throw new Error(
+      "To generate the UI, you must define a UIEventSchema in your workflow.",
+    );
+  }
+
  // You can also generate for other workflow events
  const generatedCode = await generateEventComponent(UIEventSchema, llm);
  // Write the generated code to components/ui_event.ts
--- a/Show More
+++ b/Show More