Release 0.11.19 (#2105 )

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
feat: add llm.exec (#2078 )
2026-07-01 22:14:03 -04:00 · 2025-07-17 15:44:22 +08:00 · 2025-07-17 15:36:56 +08:00 · 2025-07-17 10:30:39 +08:00 · 2025-07-16 13:49:49 +08:00 · 2025-07-15 10:06:17 -07:00
868 changed files with 40850 additions and 25685 deletions
@@ -8,6 +8,11 @@ on:
    branches:
      - main

+env:
+  TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }}
+  TURBO_TEAM: ${{ vars.TURBO_TEAM }}
+  TURBO_REMOTE_ONLY: true
+
 jobs:
  lint:
    runs-on: ubuntu-latest
@@ -1,6 +1,11 @@
 name: Publish Preview
 on: [pull_request]

+env:
+  TURBO_TOKEN: ${{ secrets.TURBO_TOKEN }}
+  TURBO_TEAM: ${{ vars.TURBO_TEAM }}
+  TURBO_REMOTE_ONLY: true
+
 jobs:
  pre_release:
    name: Pre Release
@@ -23,7 +23,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        node-version: [18.x, 20.x, 22.x, 23.x]
+        node-version: [20.x, 22.x, 23.x]
    name: E2E on Node.js ${{ matrix.node-version }}
    runs-on: ubuntu-latest
    steps:
@@ -53,7 +53,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        node-version: [18.x, 20.x, 22.x, 23.x]
+        node-version: [20.x, 22.x, 23.x]
    name: Test on Node.js ${{ matrix.node-version }}
    runs-on: ubuntu-latest
    steps:
@@ -87,6 +87,30 @@ jobs:
        run: pnpm run type-check
      - name: Run Circular Dependency Check
        run: pnpm run circular-check
+  e2e-npm:
+    runs-on: ubuntu-latest
+    name: Test using packages with npm
+    steps:
+      - uses: actions/checkout@v4
+      - uses: pnpm/action-setup@v4
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version-file: ".nvmrc"
+      - name: Install dependencies
+        run: pnpm install
+      - name: Build packages
+        run: pnpm run build
+      - name: Pack packages
+        run: |
+          pnpm pack --pack-destination ${{ runner.temp }} -C packages/llamaindex
+          pnpm pack --pack-destination ${{ runner.temp }} -C packages/workflow
+      - name: Install packed packages
+        run: npm add ${{ runner.temp }}/*.tgz
+        working-directory: e2e/npm
+      - name: Run tests
+        run: npm test
+        working-directory: e2e/npm
  e2e-llamaindex-examples:
    strategy:
      fail-fast: false
@@ -1 +1 @@
-20
+22
@@ -7,3 +7,4 @@ dist/
 .source/
 # prttier doesn't support mdx3 we are using
 *.mdx
+packages/server/server/
@@ -0,0 +1,92 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Development Commands
+
+This project uses pnpm as the package manager and Turbo for build orchestration:
+
+- `pnpm install` - Install all dependencies
+- `pnpm build` - Build all packages using Turbo
+- `pnpm dev` - Start development mode for all packages
+- `pnpm test` - Run all unit tests
+- `pnpm e2e` - Run end-to-end tests
+- `pnpm lint` - Run ESLint across all packages
+- `pnpm type-check` - Run TypeScript type checking across workspace
+- `pnpm format` - Check code formatting with Prettier
+- `pnpm format:write` - Auto-fix formatting issues
+- `pnpm circular-check` - Check for circular dependencies using madge
+
+For individual package development:
+
+- `turbo run build --filter="@llamaindex/core"` - Build specific package
+- `turbo run test --filter="@llamaindex/core"` - Test specific package
+- Navigate to specific package directory and run `pnpm test` for focused testing
+- `pnpm clean` - Remove all build artifacts and node_modules across workspace
+
+## Architecture Overview
+
+LlamaIndex.TS is a TypeScript data framework for LLM applications organized as a pnpm monorepo with multiple runtime environment support (Node.js, Deno, Bun, Vercel Edge, Cloudflare Workers).
+
+### Package Structure
+
+**Core Packages:**
+
+- `packages/core/` - Abstract base classes and interfaces for all runtime environments
+- `packages/llamaindex/` - Main package that aggregates core functionality
+- `packages/env/` - Environment-specific compatibility layers for different JS runtimes
+
+**Provider Packages (`packages/providers/`):**
+
+- LLM providers: `openai/`, `anthropic/`, `ollama/`, `google/`, `groq/`, etc.
+- Vector stores: `storage/pinecone/`, `storage/chroma/`, `storage/qdrant/`, etc.
+- Embeddings: Various embedding providers integrated within LLM packages
+- Readers: `assemblyai/`, `discord/`, `notion/` for data ingestion
+
+**Specialized Packages:**
+
+- `packages/cloud/` - LlamaCloud integration for managed services
+- `packages/tools/` - Function calling tools and utilities
+- `packages/workflow/` - Agent workflow orchestration
+- `packages/readers/` - File format readers (PDF, DOCX, etc.)
+
+### Key Architectural Patterns
+
+**Runtime Abstraction:** Core functionality is runtime-agnostic, with environment-specific implementations in separate entry points (`index.ts`, `index.edge.ts`, `index.workerd.ts`).
+
+**Provider Pattern:** LLMs, embeddings, and vector stores implement common interfaces from `@llamaindex/core`, allowing easy swapping between providers.
+
+**Modular Design:** Each provider is a separate package to minimize bundle size - users install only what they need.
+
+**Data Flow:** Document → NodeParser → Embedding → VectorStore → Retriever → QueryEngine → Response
+
+### Core Components
+
+- **Agents and Workflows:** Abstractions for building agentic workflows and agents in `packages/workflow`
+- **Chat Engines:** Conversational interfaces in `core/chat-engine/`
+- **Query Engines:** Document querying with retrieval in `core/query-engine/`
+- **Indices:** VectorStoreIndex, SummaryIndex, KeywordTable in `llamaindex/indices/`
+- **Node Parsers:** Text splitting and chunking in `core/node-parser/`
+- **Ingestion Pipeline:** Document processing workflows in `llamaindex/ingestion/`
+- **Storage:** Chat stores, document stores, index stores, and KV stores in `core/storage/`
+
+### Deprecated Components
+
+- **Agents:** ReAct and function calling agents in `core/agent/` and `llamaindex/agent/`
+
+### Testing Structure
+
+- Unit tests in each package's `tests/` directory
+- E2E tests in `e2e/` directory with runtime-specific examples
+- Tests depend on build artifacts, so always run `pnpm build` before testing
+
+### Multi-Runtime Support
+
+The codebase supports multiple JavaScript runtimes through conditional exports and separate entry points. When making changes, consider compatibility across Node.js, Deno, Bun, and edge runtimes.
+
+### Development Notes
+
+- The project uses Husky for git hooks with lint-staged for pre-commit formatting and linting
+- All packages use bunchee for building with dual CJS/ESM support
+- Core package exports are organized as sub-modules (e.g., `@llamaindex/core/llms`, `@llamaindex/core/embeddings`)
+- Always run `pnpm build` before running tests, as tests depend on build artifacts
@@ -25,7 +25,7 @@ Make sure you have Node.js LTS (Long-term Support) installed. You can check your

 ```shell
 node -v
-# v20.x.x
+# v22.x.x
 ```

 ### Use pnpm
@@ -38,6 +38,7 @@ npm install -g pnpm

 ```shell
 pnpm install
+pnpm install -g tsx
 ```

 ### Build the packages
@@ -48,6 +49,56 @@ To build all packages, run:
 pnpm build
 ```

+### Start Developing
+
+You can launch the package in dev-mode by running:
+
+```shell
+pnpm dev
+```
+
+This will use turbo to run all packages in watch-mode. This means you can make changes and have them automatically built.
+
+If you want to customize what packages are built/watched, you can run turbo directly and adjust the filter:
+
+```shell
+pnpm turbo run dev --filter="./packages/core" --concurrency=100
+```
+
+In another terminal, you can write and run any script needed to quickly test your changes. For example:
+
+```typescript
+import { createMemory, staticBlock } from "@llamaindex/core/memory";
+
+// Create memory with predefined context
+const memory = createMemory({
+  memoryBlocks: [
+    staticBlock({
+      content:
+        "The user is a software engineer who loves TypeScript and LlamaIndex.",
+      messageRole: "system",
+    }),
+  ],
+});
+
+async function main() {
+  const result = await memory.getLLM();
+  console.log(result);
+}
+
+void main().catch(console.error);
+```
+
+And run it with:
+
+```shell
+pnpm exec tsx my_script.ts
+```
+
+This flow allows you to easily test your changes without having to build the entire project.
+
+Once you are happy with your changes, be sure to add tests (and confirm existing tests are passing!).
+
 ### Run tests

 #### Unit tests
@@ -92,7 +143,7 @@ Before sending a PR, make sure of the following:
 3. If you have a new feature, add a new example in the `examples` folder.
 4. You have a descriptive changeset for each PR:

-### Changesets
+### Bumping the versions of packages you've modified

 We use [changesets](https://github.com/changesets/changesets) for managing versions and changelogs. To create a new
 changeset, run in the root folder:
@@ -101,6 +152,8 @@ changeset, run in the root folder:
 pnpm changeset
 ```

+You will be prompted to choose what packages need their versions bumped, and what kind of bump (major, minor or patch) is needed. Once you carry out this operation, the bumping will be automatic after the PR is merged.
+
 ## Publishing (maintainers only)

 The [Release Github Action](.github/workflows/release.yml) is automatically generating and updating a
@@ -7,9 +7,10 @@
 </h3>

 [![NPM Version](https://img.shields.io/npm/v/llamaindex)](https://www.npmjs.com/package/llamaindex)
-[![NPM License](https://img.shields.io/npm/l/llamaindex)](https://www.npmjs.com/package/llamaindex)
+[![NPM License](https://img.shields.io/npm/l/llamaindex)](https://github.com/run-llama/LlamaIndexTS/blob/main/LICENSE)
 [![NPM Downloads](https://img.shields.io/npm/dm/llamaindex)](https://www.npmjs.com/package/llamaindex)
 [![Discord](https://img.shields.io/discord/1059199217496772688)](https://discord.com/invite/eN6D2HQ4aX)
+[![Twitter](https://img.shields.io/twitter/follow/llama_index)](https://x.com/llama_index)

 Use your own data with large language models (LLMs, OpenAI ChatGPT and others) in JS runtime environments with TypeScript support.

@@ -63,7 +64,7 @@ yarn add llamaindex

 ### Setup in Node.js, Deno, Bun, TypeScript...?

-See our official document: <https://ts.llamaindex.ai/docs/llamaindex/getting_started/>
+See our official document: https://ts.llamaindex.ai/docs/llamaindex/getting_started

 ### Adding provider packages

@@ -83,19 +84,7 @@ Check out our NextJS playground at https://llama-playground.vercel.app/. The sou

 ## Core concepts for getting started:

- [Document](/packages/llamaindex/src/Node.ts): A document represents a text file, PDF file or other contiguous piece of data.
-
- [Node](/packages/llamaindex/src/Node.ts): The basic data building block. Most commonly, these are parts of the document split into manageable pieces that are small enough to be fed into an embedding model and LLM.
-
- [Embedding](/packages/llamaindex/src/embeddings/OpenAIEmbedding.ts): Embeddings are sets of floating point numbers which represent the data in a Node. By comparing the similarity of embeddings, we can derive an understanding of the similarity of two pieces of data. One use case is to compare the embedding of a question with the embeddings of our Nodes to see which Nodes may contain the data needed to answer that question. Because the default service context is OpenAI, the default embedding is `OpenAIEmbedding`. If using different models, say through Ollama, use this [Embedding](/packages/llamaindex/src/embeddings/OllamaEmbedding.ts) (see all [here](/packages/llamaindex/src/embeddings)).
-
- [Indices](/packages/llamaindex/src/indices/): Indices store the Nodes and the embeddings of those nodes. QueryEngines retrieve Nodes from these Indices using embedding similarity.
-
- [QueryEngine](/packages/llamaindex/src/engines/query/RetrieverQueryEngine.ts): Query engines are what generate the query you put in and give you back the result. Query engines generally combine a pre-built prompt with selected Nodes from your Index to give the LLM the context it needs to answer your query. To build a query engine from your Index (recommended), use the [`asQueryEngine`](/packages/llamaindex/src/indices/BaseIndex.ts) method on your Index. See all query engines [here](/packages/llamaindex/src/engines/query).
-
- [ChatEngine](/packages/llamaindex/src/engines/chat/SimpleChatEngine.ts): A ChatEngine helps you build a chatbot that will interact with your Indices. See all chat engines [here](/packages/llamaindex/src/engines/chat).
-
- [SimplePrompt](/packages/llamaindex/src/Prompt.ts): A simple standardized function call definition that takes in inputs and formats them in a template literal. SimplePrompts can be specialized using currying and combined using other SimplePrompt functions.
+See our documentation: https://ts.llamaindex.ai/docs/llamaindex/getting_started/concepts

 ## Contributing:

@@ -1,5 +1,318 @@
 # @llamaindex/doc

+## 0.2.40
+
+### Patch Changes
+
+- Updated dependencies [7ad3411]
+- Updated dependencies [5da5b3c]
+- Updated dependencies [a1fdb07]
+  - @llamaindex/core@0.6.15
+  - @llamaindex/workflow@1.1.15
+  - @llamaindex/openai@0.4.9
+  - @llamaindex/cloud@4.0.24
+  - llamaindex@0.11.19
+  - @llamaindex/node-parser@2.0.15
+  - @llamaindex/readers@3.1.14
+
+## 0.2.39
+
+### Patch Changes
+
+- Updated dependencies [a1b1598]
+  - @llamaindex/cloud@4.0.23
+  - llamaindex@0.11.18
+
+## 0.2.38
+
+### Patch Changes
+
+- Updated dependencies [d2be868]
+  - @llamaindex/cloud@4.0.22
+  - llamaindex@0.11.17
+
+## 0.2.37
+
+### Patch Changes
+
+- Updated dependencies [579ca0c]
+  - @llamaindex/cloud@4.0.21
+  - llamaindex@0.11.16
+
+## 0.2.36
+
+### Patch Changes
+
+- Updated dependencies [48b0d88]
+- Updated dependencies [f185772]
+  - @llamaindex/cloud@4.0.20
+  - llamaindex@0.11.15
+
+## 0.2.35
+
+### Patch Changes
+
+- Updated dependencies [5a0ed1f]
+- Updated dependencies [5a0ed1f]
+- Updated dependencies [8eeac33]
+  - @llamaindex/cloud@4.0.19
+  - @llamaindex/core@0.6.14
+  - llamaindex@0.11.14
+  - @llamaindex/node-parser@2.0.14
+  - @llamaindex/openai@0.4.8
+  - @llamaindex/readers@3.1.13
+  - @llamaindex/workflow@1.1.14
+
+## 0.2.34
+
+### Patch Changes
+
+- 39758ab: Add title to homepage header
+
+## 0.2.33
+
+### Patch Changes
+
+- Updated dependencies [47a7555]
+  - @llamaindex/cloud@4.0.18
+  - llamaindex@0.11.13
+
+## 0.2.32
+
+### Patch Changes
+
+- Updated dependencies [d578889]
+- Updated dependencies [0fcc92f]
+- Updated dependencies [515a8b9]
+  - @llamaindex/core@0.6.13
+  - llamaindex@0.11.12
+  - @llamaindex/cloud@4.0.17
+  - @llamaindex/node-parser@2.0.13
+  - @llamaindex/openai@0.4.7
+  - @llamaindex/readers@3.1.12
+  - @llamaindex/workflow@1.1.13
+
+## 0.2.31
+
+### Patch Changes
+
+- Updated dependencies [7039e1a]
+- Updated dependencies [7039e1a]
+  - llamaindex@0.11.11
+  - @llamaindex/core@0.6.12
+  - @llamaindex/cloud@4.0.16
+  - @llamaindex/node-parser@2.0.12
+  - @llamaindex/openai@0.4.6
+  - @llamaindex/readers@3.1.11
+  - @llamaindex/workflow@1.1.12
+
+## 0.2.30
+
+### Patch Changes
+
+- Updated dependencies [f7ec293]
+  - @llamaindex/workflow@1.1.11
+  - llamaindex@0.11.10
+
+## 0.2.29
+
+### Patch Changes
+
+- Updated dependencies [c5846bd]
+  - @llamaindex/readers@3.1.10
+
+## 0.2.28
+
+### Patch Changes
+
+- Updated dependencies [a89e187]
+- Updated dependencies [62699b7]
+- Updated dependencies [c5b2691]
+- Updated dependencies [d8ac8d3]
+  - @llamaindex/core@0.6.11
+  - @llamaindex/openai@0.4.5
+  - @llamaindex/cloud@4.0.15
+  - llamaindex@0.11.9
+  - @llamaindex/node-parser@2.0.11
+  - @llamaindex/readers@3.1.9
+  - @llamaindex/workflow@1.1.10
+
+## 0.2.27
+
+### Patch Changes
+
+- 8a51c16: Add natural language agent page
+- Updated dependencies [8a51c16]
+- Updated dependencies [1b5af14]
+  - @llamaindex/workflow@1.1.9
+  - @llamaindex/core@0.6.10
+  - llamaindex@0.11.8
+  - @llamaindex/cloud@4.0.14
+  - @llamaindex/node-parser@2.0.10
+  - @llamaindex/openai@0.4.4
+  - @llamaindex/readers@3.1.8
+
+## 0.2.26
+
+### Patch Changes
+
+- a4d394f: fix: correct SimpleDirectoryReader import path in documentation example
+- Updated dependencies [dbd857f]
+- Updated dependencies [3c857f4]
+  - @llamaindex/workflow@1.1.8
+  - llamaindex@0.11.7
+
+## 0.2.25
+
+### Patch Changes
+
+- Updated dependencies [40161fe]
+  - @llamaindex/workflow@1.1.7
+  - llamaindex@0.11.6
+
+## 0.2.24
+
+### Patch Changes
+
+- Updated dependencies [766054b]
+- Updated dependencies [71598f8]
+  - @llamaindex/workflow@1.1.6
+  - @llamaindex/core@0.6.9
+  - llamaindex@0.11.5
+  - @llamaindex/cloud@4.0.13
+  - @llamaindex/node-parser@2.0.9
+  - @llamaindex/openai@0.4.3
+  - @llamaindex/readers@3.1.7
+
+## 0.2.23
+
+### Patch Changes
+
+- Updated dependencies [c927457]
+  - @llamaindex/openai@0.4.2
+  - @llamaindex/core@0.6.8
+  - @llamaindex/cloud@4.0.12
+  - llamaindex@0.11.4
+  - @llamaindex/node-parser@2.0.8
+  - @llamaindex/readers@3.1.6
+  - @llamaindex/workflow@1.1.5
+
+## 0.2.22
+
+### Patch Changes
+
+- Updated dependencies [76ff23d]
+  - @llamaindex/cloud@4.0.11
+  - llamaindex@0.11.3
+
+## 0.2.21
+
+### Patch Changes
+
+- Updated dependencies [59601dd]
+  - @llamaindex/openai@0.4.1
+  - @llamaindex/core@0.6.7
+  - @llamaindex/cloud@4.0.10
+  - llamaindex@0.11.2
+  - @llamaindex/node-parser@2.0.7
+  - @llamaindex/readers@3.1.5
+  - @llamaindex/workflow@1.1.4
+
+## 0.2.20
+
+### Patch Changes
+
+- Updated dependencies [3703f90]
+  - @llamaindex/cloud@4.0.9
+  - llamaindex@0.11.1
+
+## 0.2.19
+
+### Patch Changes
+
+- Updated dependencies [680b529]
+- Updated dependencies [b0cd530]
+- Updated dependencies [361a685]
+- Updated dependencies [3e66ddc]
+  - @llamaindex/workflow@1.1.3
+  - @llamaindex/core@0.6.6
+  - llamaindex@0.11.0
+  - @llamaindex/openai@0.4.0
+  - @llamaindex/cloud@4.0.8
+  - @llamaindex/node-parser@2.0.6
+  - @llamaindex/readers@3.1.4
+
+## 0.2.18
+
+### Patch Changes
+
+- d671ed6: Add functionality for search params when querying Qdrant vector store.
+- Updated dependencies [76c9a80]
+- Updated dependencies [168d11f]
+- Updated dependencies [d671ed6]
+- Updated dependencies [40f5f41]
+  - @llamaindex/openai@0.3.7
+  - @llamaindex/workflow@1.1.2
+  - @llamaindex/core@0.6.5
+  - @llamaindex/cloud@4.0.7
+  - llamaindex@0.10.6
+  - @llamaindex/node-parser@2.0.5
+  - @llamaindex/readers@3.1.3
+
+## 0.2.17
+
+### Patch Changes
+
+- Updated dependencies [9b2e25a]
+  - @llamaindex/openai@0.3.6
+  - @llamaindex/core@0.6.4
+  - llamaindex@0.10.5
+  - @llamaindex/cloud@4.0.6
+  - @llamaindex/node-parser@2.0.4
+  - @llamaindex/readers@3.1.2
+  - @llamaindex/workflow@1.1.1
+
+## 0.2.16
+
+### Patch Changes
+
+- Updated dependencies [7e8e454]
+- Updated dependencies [2225ffd]
+- Updated dependencies [6ddf1c1]
+- Updated dependencies [bc53342]
+- Updated dependencies [41953a3]
+  - @llamaindex/workflow@1.1.0
+  - @llamaindex/cloud@4.0.5
+  - llamaindex@0.10.4
+
+## 0.2.15
+
+### Patch Changes
+
+- Updated dependencies [3ee8c83]
+  - @llamaindex/core@0.6.3
+  - llamaindex@0.10.3
+  - @llamaindex/openai@0.3.5
+  - @llamaindex/cloud@4.0.4
+  - @llamaindex/node-parser@2.0.3
+  - @llamaindex/readers@3.1.1
+  - @llamaindex/workflow@1.0.4
+
+## 0.2.14
+
+### Patch Changes
+
+- Updated dependencies [1e59695]
+  - @llamaindex/readers@3.1.0
+
+## 0.2.13
+
+### Patch Changes
+
+- Updated dependencies [e5c3f95]
+  - @llamaindex/openai@0.3.4
+  - llamaindex@0.10.2
+
 ## 0.2.12

 ### Patch Changes
@@ -0,0 +1,143 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with the LlamaIndex.TS documentation site.
+
+## Application Overview
+
+This is a Next.js documentation site (`@llamaindex/doc`) that serves as the official documentation for LlamaIndex.TS. It's built using Fumadocs, a modern documentation framework, and includes interactive features, API documentation generation, and AI-powered chat functionality.
+
+## Development Commands
+
+From this directory (`apps/next/`):
+
+- `pnpm dev` - Start development server with Turbo
+- `pnpm build` - Build the documentation site (includes `prebuild` step)
+- `pnpm start` - Start production server
+- `pnpm build:docs` - Generate API documentation from TypeScript source
+- `pnpm validate-links` - Validate all internal and external links
+
+Key build process:
+
+1. `prebuild` runs `build:docs` to generate API documentation using TypeDoc
+2. `build` runs Next.js build process
+3. `postbuild` runs post-processing scripts and link validation
+
+## Architecture
+
+### Framework Stack
+
+- **Next.js 15.3** - React framework with App Router
+- **Fumadocs** - Documentation framework with MDX support
+- **React Server Components** - AI chat functionality with server actions
+- **Tailwind CSS** - Styling with custom design system
+- **TypeScript** - Full type safety
+
+### Key Dependencies
+
+- **Fumadocs ecosystem**: `fumadocs-ui`, `fumadocs-mdx`, `fumadocs-core`, `fumadocs-openapi`
+- **AI features**: `ai` package for React Server Components chat
+- **Code features**: Monaco Editor, Shiki syntax highlighting, Twoslash TypeScript integration
+- **UI components**: Radix UI primitives, Framer Motion animations
+- **Content processing**: MDX, remark/rehype plugins, TypeDoc for API generation
+
+### Directory Structure
+
+**Content Management:**
+
+- `src/content/docs/` - MDX documentation files organized by topic
+- `src/content/docs/api/` - Auto-generated API documentation from TypeScript
+- `scripts/` - Build-time documentation generation and validation
+
+**Application Code:**
+
+- `src/app/` - Next.js App Router pages and API routes
+- `src/components/` - Reusable React components including UI library
+- `src/lib/` - Utilities, constants, and configuration
+
+**Configuration:**
+
+- `source.config.ts` - Fumadocs MDX configuration with plugins
+- `next.config.mjs` - Next.js configuration with MDX integration
+- `tailwind.config.mjs` - Tailwind CSS customization
+
+### Key Features
+
+**Documentation Features:**
+
+- MDX-based content with TypeScript code highlighting
+- Auto-generated API documentation from TypeScript source
+- Interactive code examples with Monaco Editor
+- Math equation support with KaTeX
+- Link validation and build-time checks
+
+**Interactive Features:**
+
+- AI-powered chat interface using React Server Components
+- Code demos with live TypeScript execution
+- Interactive UI components and animations
+- Search functionality across all documentation
+
+**Build Process:**
+
+- TypeDoc generates API documentation from workspace packages
+- Custom scripts transform and validate generated content
+- Link checking ensures all internal/external links work
+- Static site generation with 10-minute timeout for large documentation set
+
+### Configuration Files
+
+**source.config.ts**: Defines MDX processing pipeline with:
+
+- Code highlighting themes (Catppuccin)
+- Twoslash TypeScript integration
+- Remark/rehype plugins for enhanced Markdown
+- Content directories including external docs
+
+**next.config.mjs**: Next.js configuration with:
+
+- Extended static generation timeout (10 minutes)
+- Monaco Editor transpilation
+- Server external packages for build optimization
+- Webpack/Turbopack aliases for browser compatibility
+
+### Content Organization
+
+**Documentation Structure:**
+
+- `/docs/llamaindex/` - Core LlamaIndex.TS documentation
+- `/docs/cloud/` - LlamaCloud integration guides
+- `/docs/api/` - Auto-generated TypeScript API reference
+
+**Content Sources:**
+
+- Local MDX files in `src/content/docs/`
+- External docs from `@llamaindex/workflow-docs` package
+- Generated API docs from TypeScript source
+
+### Development Notes
+
+- Documentation content is sourced from multiple locations including external packages
+- API documentation is regenerated on each build from TypeScript source
+- The site uses advanced MDX features including custom transformers and plugins
+- Build process includes comprehensive link validation
+- Large memory allocation needed for TypeDoc generation (`--max-old-space-size=8192`)
+- Chat functionality uses React Server Components with streaming responses
+
+### AI Chat Integration
+
+The documentation includes an AI chat feature that:
+
+- Uses React Server Components for server-side AI processing
+- Integrates with LlamaIndex.TS packages for demonstrations
+- Provides interactive examples and code generation
+- Streams responses for better user experience
+
+### Content Authoring
+
+When adding new documentation:
+
+- Create MDX files in appropriate `src/content/docs/` subdirectories
+- Follow existing content structure and frontmatter conventions
+- Use Fumadocs MDX features like code blocks, callouts, and tabs
+- API documentation is auto-generated - edit TypeScript source comments instead
+- Run `pnpm validate-links` to check all links before publishing
@@ -3,6 +3,8 @@
 This is a Next.js application generated with
 [Create Fumadocs](https://github.com/fuma-nama/fumadocs).

+> Note: Before running the development server, make sure to build the whole project first, see [CONTRIBUTING.md](../../CONTRIBUTING.md) for more details.
+
 Run development server:

 ```bash
@@ -12,9 +12,9 @@
  },
  "aliases": {
    "components": "@/components",
-    "utils": "@/lib/utils",
+    "utils": "@/libs/utils",
    "ui": "@/components/ui",
-    "lib": "@/lib",
+    "lib": "@/libs",
    "hooks": "@/hooks"
  }
 }
@@ -15,6 +15,20 @@ const config = {
    "twoslash",
    "typescript",
  ],
+  async redirects() {
+    return [
+      {
+        source: "/docs/chat-ui/:path*.mdx",
+        destination: "/docs/chat-ui/:path*",
+        permanent: true,
+      },
+      {
+        source: "/docs/workflows/:path*.mdx",
+        destination: "/docs/workflows/:path*",
+        permanent: true,
+      },
+    ];
+  },
  turbopack: {
    resolveAlias: {
      fs: { browser: "./fallback.js" },
@@ -1,6 +1,6 @@
 {
  "name": "@llamaindex/doc",
-  "version": "0.2.12",
+  "version": "0.2.40",
  "private": true,
  "scripts": {
    "postinstall": "fumadocs-mdx",
@@ -15,16 +15,17 @@
  "dependencies": {
    "@huggingface/transformers": "^3.5.0",
    "@icons-pack/react-simple-icons": "^10.1.0",
-    "@llama-flow/docs": "0.0.3",
-    "@llamaindex/chat-ui": "0.2.0",
+    "@llamaindex/chat-ui-docs": "^0.0.5",
    "@llamaindex/cloud": "workspace:*",
    "@llamaindex/core": "workspace:*",
    "@llamaindex/node-parser": "workspace:*",
    "@llamaindex/openai": "workspace:*",
    "@llamaindex/readers": "workspace:*",
    "@llamaindex/workflow": "workspace:*",
+    "@llamaindex/workflow-docs": "0.1.1",
    "@mdx-js/mdx": "^3.1.0",
    "@monaco-editor/react": "^4.7.0",
+    "@next/third-parties": "^15.3.4",
    "@number-flow/react": "^0.3.4",
    "@radix-ui/react-dialog": "^1.1.2",
    "@radix-ui/react-icons": "^1.3.2",
@@ -34,22 +35,22 @@
    "@radix-ui/react-tooltip": "^1.1.4",
    "@scalar/api-client-react": "^1.1.25",
    "@vercel/functions": "^1.5.0",
-    "ai": "^3.4.33",
+    "ai": "^4.3.17",
    "class-variance-authority": "^0.7.0",
    "clsx": "2.1.1",
    "foxact": "^0.2.41",
    "framer-motion": "^11.11.17",
-    "fumadocs-core": "^15.2.7",
+    "fumadocs-core": "^15.5.0",
    "fumadocs-docgen": "^2.0.0",
-    "fumadocs-mdx": "^11.6.0",
-    "fumadocs-openapi": "^8.0.1",
-    "fumadocs-twoslash": "^3.1.1",
-    "fumadocs-typescript": "^4.0.2",
-    "fumadocs-ui": "^15.2.7",
+    "fumadocs-mdx": "^11.6.6",
+    "fumadocs-openapi": "^9.0.5",
+    "fumadocs-twoslash": "^3.1.3",
+    "fumadocs-typescript": "^4.0.5",
+    "fumadocs-ui": "^15.5.0",
    "hast-util-to-jsx-runtime": "^2.3.2",
    "llamaindex": "workspace:*",
    "lucide-react": "^0.460.0",
-    "next": "^15.3.0",
+    "next": "^15.3.3",
    "next-themes": "^0.4.3",
    "react": "^19.1.0",
    "react-dom": "^19.1.0",
@@ -69,30 +70,30 @@
    "twoslash": "^0.3.1",
    "use-stick-to-bottom": "^1.0.42",
    "web-tree-sitter": "^0.24.4",
-    "zod": "^3.23.8"
+    "zod": "^3.25.76"
  },
  "devDependencies": {
    "@next/env": "^15.3.0",
    "@tailwindcss/postcss": "^4.0.9",
    "@types/mdx": "^2.0.13",
-    "@types/node": "22.9.0",
-    "@types/react": "^19.0.10",
-    "@types/react-dom": "^19.0.4",
+    "@types/node": "24.0.13",
+    "@types/react": "^19.1.8",
+    "@types/react-dom": "^19.1.6",
    "autoprefixer": "^10.4.20",
    "cross-env": "^7.0.3",
    "fast-glob": "^3.3.2",
    "gray-matter": "^4.0.3",
-    "postcss": "^8.5.3",
+    "postcss": "^8.5.6",
    "raw-loader": "^4.0.2",
    "remark": "^15.0.1",
    "remark-gfm": "^4.0.0",
    "remark-mdx": "^3.1.0",
    "remark-stringify": "^11.0.0",
-    "tailwindcss": "^4.0.9",
-    "tsx": "^4.19.3",
-    "typedoc": "0.28.2",
+    "tailwindcss": "^4.1.11",
+    "tsx": "^4.20.3",
+    "typedoc": "0.28.3",
    "typedoc-plugin-markdown": "^4.6.2",
-    "typedoc-plugin-merge-modules": "^7.0.0",
-    "typescript": "^5.7.3"
+    "typedoc-plugin-merge-modules": " ^7.0.0",
+    "typescript": "^5.8.3"
  }
 }
@@ -1,4 +1,3 @@
-import { generateFiles as openapiGenerateFiles } from "fumadocs-openapi";
 import {
  createGenerator,
  generateFiles as typescriptGenerateFiles,
@@ -14,18 +13,12 @@ const apiRefOut = "./src/content/docs/api";
 // clean generated files
 rimrafSync(out, {
  filter(v) {
-    return !v.endsWith("index.mdx") && !v.endsWith("meta.json");
+    return !v.endsWith("index.md") && !v.endsWith("meta.json");
  },
 });

-void openapiGenerateFiles({
-  input: ["../../packages/cloud/openapi.json"],
-  output: "./src/content/docs/cloud/api",
-  groupBy: "tag",
-});
-
 void typescriptGenerateFiles(generator, {
-  input: ["./src/content/docs/api/**/*.mdx"],
+  input: ["./src/content/docs/api/**/*.md"],
  output: (file) => path.resolve(path.dirname(file), path.basename(file)),
  transformOutput,
 });
@@ -34,19 +27,22 @@ function transformOutput(filePath: string, content: string) {
  const fileName = path.basename(filePath);
  let title = fileName.split(".")[0];
  if (title === "index") title = "LlamaIndex API Reference";
-  return `---\ntitle: ${title}\n---\n\n${transformAbsoluteUrl(content, filePath)}`;
+  return `---\ntitle: ${title}\n---\n\n${transformAbsoluteUrl(
+    content.replace(/(?<!\\)\{([^}]+)(?<!\\)}/g, "\\{$1\\}"),
+    filePath,
+  )}`;
 }

 /**
- * Transforms the content by converting relative MDX links to absolute docs API links
- * Example: [text](../type-aliases/TaskHandler.mdx) -> [text](/docs/api/type-aliases/TaskHandler)
- * [text](BaseChatEngine.mdx) -> [text](/docs/api/classes/BaseChatEngine)
- * [text](BaseVectorStore.mdx#constructors) -> [text](/docs/api/classes/BaseVectorStore#constructors)
- * [text](TaskStep.mdx) -> [text](/docs/api/type-aliases/TaskStep)
+ * Transforms the content by converting relative MD links to absolute docs API links
+ * Example: [text](../type-aliases/TaskHandler.md) -> [text](/docs/api/type-aliases/TaskHandler)
+ * [text](BaseChatEngine.md) -> [text](/docs/api/classes/BaseChatEngine)
+ * [text](BaseVectorStore.md#constructors) -> [text](/docs/api/classes/BaseVectorStore#constructors)
+ * [text](TaskStep.md) -> [text](/docs/api/type-aliases/TaskStep)
 */
 function transformAbsoluteUrl(content: string, filePath: string) {
  const group = path.dirname(filePath).split(path.sep).pop();
-  return content.replace(/\]\(([^)]+)\.mdx([^)]*)\)/g, (_, slug, anchor) => {
+  return content.replace(/\]\(([^)]+)\.md([^)]*)\)/g, (_, slug, anchor) => {
    const slugParts = slug.split("/");
    const fileName = slugParts[slugParts.length - 1];
    const fileGroup = slugParts[slugParts.length - 2] ?? group;
@@ -4,7 +4,6 @@ import matter from "gray-matter";
 import path from "path";

 const CONTENT_DIR = path.join(process.cwd(), "src/content/docs");
-const BUILD_DIR = path.join(process.cwd(), ".next");

 // Regular expression to find internal links
 // This captures Markdown links [text](/docs/path) and href attributes href="/docs/path"
@@ -14,6 +13,8 @@ const INTERNAL_LINK_REGEX = /(?:(?:\]\(|\bhref=["'])\/docs\/([^")]+))/g;
 // This captures relative links like [text](./path) or ![alt](../images/image.png)
 const RELATIVE_LINK_REGEX = /(?:\]\()(?:\s*)(?:\.\.?)\//g;

+const ALLOWED_LINKS = ["/docs/workflows", "/docs/chat-ui"];
+
 interface LinkValidationResult {
  file: string;
  invalidLinks: Array<{ link: string; line: number }>;
@@ -28,14 +29,14 @@ interface RelativeLinkResult {
 * Get all valid documentation routes from the content directory
 */
 async function getValidRoutes(): Promise<Set<string>> {
-  const mdxFiles = await glob("**/*.mdx", { cwd: CONTENT_DIR });
+  const mdxFiles = await glob("**/*.{md,mdx}", { cwd: CONTENT_DIR });

  const routes = new Set<string>();

  // Add each MDX file as a valid route
  for (const file of mdxFiles) {
    // Remove .mdx extension and normalize to route format
-    let route = file.replace(/\.mdx$/, "");
+    let route = file.replace(/\.mdx?$/, "");

    // Handle index files
    if (route.endsWith("/index")) {
@@ -124,9 +125,6 @@ function findRelativeLinksInFile(
  return relativeLinks;
 }

-/**
- * Validate internal links in all MDX files
- */
 /**
 * Find relative links in all MDX files
 */
@@ -160,6 +158,11 @@ async function validateLinks(): Promise<LinkValidationResult[]> {
    const links = extractLinksFromFile(filePath);

    const invalidLinks = links.filter(({ link }) => {
+      // Check if the link is in the allowed list
+      if (ALLOWED_LINKS.includes(`/docs/${link}`)) {
+        return false;
+      }
+
      // Check if the link exists in valid routes
      // First normalize the link (remove any query string or hash)
      const baseLink = link.split("?")[0].split("#")[0];
@@ -1,13 +1,24 @@
-import { rehypeCodeDefaultOptions } from "fumadocs-core/mdx-plugins";
+import {
+  rehypeCodeDefaultOptions,
+  remarkStructure,
+} from "fumadocs-core/mdx-plugins";
 import { fileGenerator, remarkDocGen, remarkInstall } from "fumadocs-docgen";
 import { defineConfig, defineDocs } from "fumadocs-mdx/config";
 import { transformerTwoslash } from "fumadocs-twoslash";
-import { createFileSystemTypesCache } from "fumadocs-twoslash/cache-fs";
 import rehypeKatex from "rehype-katex";
 import remarkMath from "remark-math";

 export const docs = defineDocs({
-  dir: ["./src/content/docs", "./node_modules/@llama-flow/docs"],
+  dir: [
+    "./src/content/docs",
+    "./node_modules/@llamaindex/workflow-docs",
+    "./node_modules/@llamaindex/chat-ui-docs",
+    // NOTE: When adding external docs (like chat-ui or workflow-docs above),
+    // make sure to also update:
+    // 1. scripts/validate-links.mts - add to ALLOWED_LINKS array
+    // 2. next.config.mjs - add redirect for .mdx files
+    // 3. src/content/docs/meta.json - add to pages array
+  ],
  docs: {
    async: true,
  },
@@ -24,11 +35,7 @@ export default defineConfig({
      },
      transformers: [
        ...(rehypeCodeDefaultOptions.transformers ?? []),
-        transformerTwoslash({
-          typesCache: createFileSystemTypesCache({
-            dir: ".next/cache/twoslash",
-          }),
-        }),
+        transformerTwoslash(),
        {
          name: "transformers:remove-notation-escape",
          code(hast) {
@@ -49,6 +56,7 @@ export default defineConfig({
      ],
    },
    remarkPlugins: [
+      remarkStructure,
      remarkMath,
      [remarkInstall, { persist: { id: "package-manager" } }],
      [remarkDocGen, { generators: [fileGenerator()] }],
@@ -10,7 +10,7 @@ import { MagicMove } from "@/components/magic-move";
 import { NpmInstall } from "@/components/npm-install";
 import { Supports } from "@/components/supports";
 import { Button } from "@/components/ui/button";
-import { DOCUMENT_URL } from "@/lib/const";
+import { DOCUMENT_URL } from "@/libs/const";
 import { SiStackblitz } from "@icons-pack/react-simple-icons";
 import { Blocks, Bot, Footprints, Terminal } from "lucide-react";
 import Link from "next/link";
@@ -26,7 +26,7 @@ const llm = openai();
 const response = await llm.chat({
  messages: [{ content: "Tell me a joke.", role: "user" }],
 });`,
-  `import { agent } from "llamaindex";
+  `import { agent } from "@llamaindex/workflow";
 import { openai } from "@llamaindex/openai";

 const analyseAgent = agent({
@@ -36,7 +36,7 @@ const analyseAgent = agent({
 });
 const response = await analyseAgent.run(\`Analyse the given data:
 \${data}\`);`,
-  `import { agent, multiAgent } from "llamaindex";
+  `import { agent, multiAgent } from "@llamaindex/workflow";
 import { openai } from "@llamaindex/openai";

 const analyseAgent = agent({
@@ -113,8 +113,10 @@ export default function HomePage() {
          description="Truly powerful retrieval-augmented generation applications use agentic techniques, and LlamaIndex.TS makes it easy to build them."
        >
          <CodeBlock
-            code={`import { agent, SimpleDirectoryReader, VectorStoreIndex } from "llamaindex";
+            code={`import { VectorStoreIndex } from "llamaindex";
+import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
 import { openai } from "@llamaindex/openai";
+import { agent } from "@llamaindex/workflow";

 // load documents from current directoy into an index
 const reader = new SimpleDirectoryReader();
@@ -1,4 +1,4 @@
-import { MockLLM } from "@llamaindex/core/utils";
+import { MockLLM } from "@llamaindex/core/llms/mock";
 import { LlamaIndexAdapter, type Message } from "ai";
 import { Settings, SimpleChatEngine, type ChatMessage } from "llamaindex";
 import { NextResponse, type NextRequest } from "next/server";
@@ -1,10 +1,10 @@
-import { source } from "@/lib/source";
+import { source } from "@/libs/source";
 import { structure } from "fumadocs-core/mdx-plugins";
 import { createFromSource } from "fumadocs-core/search/server";

 // TODO: migrate to another search service, I don't think Vercel can handle that many of documents.
 export const { GET } = createFromSource(source, (page) => ({
-  id: page.url,
+  id: page.file.path,
  title: page.data.title,
  description: page.data.description,
  url: page.url,
@@ -1,7 +1,6 @@
-import { ChatDemoRSC } from "@/components/demo/chat/rsc/demo";
 import * as demos from "@/components/demo/lazy";
-import { createMetadata, metadataImage } from "@/lib/metadata";
-import { openapi, source } from "@/lib/source";
+import { createMetadata, metadataImage } from "@/libs/metadata";
+import { openapi, source } from "@/libs/source";
 import * as Icons from "@icons-pack/react-simple-icons";
 import { APIPage } from "fumadocs-openapi/ui";
 import { Popup, PopupContent, PopupTrigger } from "fumadocs-twoslash/ui";
@@ -51,7 +50,6 @@ export default async function Page(props: {
            ...Icons,
            ...defaultMdxComponents,
            ...demos,
-            ChatDemoRSC,
            Accordion,
            Accordions,
            APIPage: (props) => <APIPage {...openapi.getAPIPageProps(props)} />,
@@ -1,11 +1,7 @@
 import { baseOptions } from "@/app/layout.config";
-import { AITrigger } from "@/components/ai-chat";
-import { buttonVariants } from "@/components/ui/button";
-import { source } from "@/lib/source";
-import { cn } from "@/lib/utils";
+import { source } from "@/libs/source";
 import "fumadocs-twoslash/twoslash.css";
 import { DocsLayout } from "fumadocs-ui/layouts/docs";
-import { MessageCircle } from "lucide-react";
 import type { ReactNode } from "react";

 export default function Layout({ children }: { children: ReactNode }) {
@@ -13,23 +9,9 @@ export default function Layout({ children }: { children: ReactNode }) {
    <DocsLayout
      tree={source.pageTree}
      {...baseOptions}
+      links={[]}
      nav={{
        ...baseOptions.nav,
-        children: (
-          <AITrigger
-            className={cn(
-              buttonVariants({
-                variant: "secondary",
-                size: "xs",
-                className:
-                  "text-fd-muted-foreground ms-2 gap-1.5 rounded-full px-2 md:flex-1",
-              }),
-            )}
-          >
-            <MessageCircle className="size-3" />
-            Ask LlamaCloud
-          </AITrigger>
-        ),
      }}
    >
      {children}
@@ -1,4 +1,4 @@
-import { DOCUMENT_URL } from "@/lib/const";
+import { DOCUMENT_URL } from "@/libs/const";
 import type { BaseLayoutProps } from "fumadocs-ui/layouts/shared";
 import Image from "next/image";

@@ -27,9 +27,19 @@ export const baseOptions: BaseLayoutProps = {
  githubUrl: "https://github.com/run-llama/LlamaIndexTS",
  links: [
    {
-      text: "Docs",
+      text: "TypeScript",
      url: DOCUMENT_URL,
      active: "nested-url",
    },
+    {
+      text: "Python",
+      url: "https://docs.llamaindex.ai",
+      active: "url",
+    },
+    {
+      text: "LlamaCloud",
+      url: "https://docs.cloud.llamaindex.ai/",
+      active: "url",
+    },
  ],
 };
@@ -1,5 +1,6 @@
 import { AIProvider } from "@/actions";
 import { TooltipProvider } from "@/components/ui/tooltip";
+import { GoogleAnalytics } from "@next/third-parties/google";
 import { RootProvider } from "fumadocs-ui/provider";
 import { Inter } from "next/font/google";
 import type { ReactNode } from "react";
@@ -31,6 +32,9 @@ export default function Layout({ children }: { children: ReactNode }) {
          sizes="16x16"
          href="/favicon-16x16.png"
        />
+        <title>
+          LlamaIndex.TS - Build LLM-powered document agents and workflows
+        </title>
      </head>
      <body className="flex min-h-screen flex-col">
        <TooltipProvider>
@@ -39,6 +43,7 @@ export default function Layout({ children }: { children: ReactNode }) {
          </AIProvider>
        </TooltipProvider>
      </body>
+      <GoogleAnalytics gaId="G-NB9B8LW9W5" />
    </html>
  );
 }
@@ -13,11 +13,7 @@ import remarkStringify from "remark-stringify";
 export const revalidate = false;

 export async function GET() {
-  const files = await fg([
-    "./src/content/docs/**/*.mdx",
-    // remove generated openapi files
-    "!./src/content/docs/cloud/api/**/*",
-  ]);
+  const files = await fg(["./src/content/docs/**/*.mdx"]);

  const scan = files.map(async (file) => {
    const fileContent = await fs.readFile(file);
@@ -1,5 +1,5 @@
 import { generateOGImage } from "@/app/og/[...slug]/og";
-import { metadataImage } from "@/lib/metadata";
+import { metadataImage } from "@/libs/metadata";
 import { type ImageResponse } from "next/og";
 import { readFileSync } from "node:fs";

@@ -1,6 +1,6 @@
 import ContributorCounter from "@/components/contributor-count";
 import { buttonVariants } from "@/components/ui/button";
-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";
 import { Heart } from "lucide-react";
 import { ReactElement } from "react";

@@ -1,5 +1,5 @@
-import { fetchContributors } from "@/lib/get-contributors";
-import { cn } from "@/lib/utils";
+import { fetchContributors } from "@/libs/get-contributors";
+import { cn } from "@/libs/utils";
 import Image from "next/image";
 import type { HTMLAttributes, ReactElement } from "react";

@@ -1,5 +1,5 @@
 "use client";
-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";
 import { TerminalIcon } from "lucide-react";
 import {
  Fragment,
@@ -1,21 +0,0 @@
-"use client";
-import {
-  ChatHandler,
-  ChatInput,
-  ChatMessages,
-  ChatSection,
-} from "@llamaindex/chat-ui";
-import { useChat } from "ai/react";
-
-export const ChatDemo = () => {
-  const handler = useChat();
-  return (
-    <ChatSection handler={handler as ChatHandler}>
-      <ChatMessages>
-        <ChatMessages.List className="h-auto max-h-[400px]" />
-        <ChatMessages.Actions />
-      </ChatMessages>
-      <ChatInput />
-    </ChatSection>
-  );
-};
@@ -1,57 +0,0 @@
-import { Markdown } from "@llamaindex/chat-ui/widgets";
-import { MockLLM } from "@llamaindex/core/utils";
-import { generateId, Message } from "ai";
-import { createAI, createStreamableUI, getMutableAIState } from "ai/rsc";
-import { type ChatMessage, Settings, SimpleChatEngine } from "llamaindex";
-import { ReactNode } from "react";
-
-type ServerState = Message[];
-type FrontendState = Array<Message & { display: ReactNode }>;
-type Actions = {
-  chat: (message: Message) => Promise<Message & { display: ReactNode }>;
-};
-
-Settings.llm = new MockLLM(); // config your LLM here
-
-export const AI = createAI<ServerState, FrontendState, Actions>({
-  initialAIState: [],
-  initialUIState: [],
-  actions: {
-    chat: async (message: Message) => {
-      "use server";
-
-      const aiState = getMutableAIState<typeof AI>();
-      aiState.update((prev) => [...prev, message]);
-
-      const uiStream = createStreamableUI();
-      const chatEngine = new SimpleChatEngine();
-      const assistantMessage: Message = {
-        id: generateId(),
-        role: "assistant",
-        content: "",
-      };
-
-      // run the async function without blocking
-      (async () => {
-        const chatResponse = await chatEngine.chat({
-          stream: true,
-          message: message.content,
-          chatHistory: aiState.get() as ChatMessage[],
-        });
-
-        for await (const chunk of chatResponse) {
-          assistantMessage.content += chunk.delta;
-          uiStream.update(<Markdown content={assistantMessage.content} />);
-        }
-
-        aiState.done([...aiState.get(), assistantMessage]);
-        uiStream.done();
-      })();
-
-      return {
-        ...assistantMessage,
-        display: uiStream.value,
-      };
-    },
-  },
-});
@@ -1,35 +0,0 @@
-"use client";
-
-import {
-  ChatHandler,
-  ChatInput,
-  ChatMessage,
-  ChatMessages,
-  ChatSection as ChatSectionUI,
-  Message,
-} from "@llamaindex/chat-ui";
-import { useChatRSC } from "./use-chat-rsc";
-
-export const ChatSectionRSC = () => {
-  const handler = useChatRSC();
-  return (
-    <ChatSectionUI handler={handler as ChatHandler}>
-      <ChatMessages>
-        <ChatMessages.List className="h-auto max-h-[400px]">
-          {handler.messages.map((message, index) => (
-            <ChatMessage
-              key={index}
-              message={message as Message}
-              isLast={index === handler.messages.length - 1}
-            >
-              <ChatMessage.Avatar />
-              <ChatMessage.Content>{message.display}</ChatMessage.Content>
-            </ChatMessage>
-          ))}
-          <ChatMessages.Loading />
-        </ChatMessages.List>
-      </ChatMessages>
-      <ChatInput />
-    </ChatSectionUI>
-  );
-};
@@ -1,8 +0,0 @@
-import { AI } from "./ai-action";
-import { ChatSectionRSC } from "./chat-section";
-
-export const ChatDemoRSC = () => (
-  <AI>
-    <ChatSectionRSC />
-  </AI>
-);
@@ -1,41 +0,0 @@
-"use client";
-
-import { useActions } from "ai/rsc";
-
-import { generateId, Message } from "ai";
-import { useUIState } from "ai/rsc";
-import { useState } from "react";
-import { AI } from "./ai-action";
-
-export function useChatRSC() {
-  const [input, setInput] = useState<string>("");
-  const [isLoading, setIsLoading] = useState<boolean>(false);
-  const [messages, setMessages] = useUIState<typeof AI>();
-  const { chat } = useActions<typeof AI>();
-
-  const append = async (message: Omit<Message, "id">) => {
-    const newMsg: Message = { ...message, id: generateId() };
-
-    setIsLoading(true);
-    try {
-      setMessages((prev) => [...prev, { ...newMsg, display: message.content }]);
-      const assistantMsg = await chat(newMsg);
-      setMessages((prev) => [...prev, assistantMsg]);
-    } catch (error) {
-      console.error(error);
-    }
-    setIsLoading(false);
-    setInput("");
-
-    return message.content;
-  };
-
-  return {
-    input,
-    setInput,
-    isLoading,
-    messages,
-    setMessages,
-    append,
-  };
-}
@@ -1,18 +1,8 @@
 "use client";
 import dynamic from "next/dynamic";

-// lazy load client components
-export const ChatDemo = dynamic(() =>
-  import("@/components/demo/chat/api/demo").then((mod) => mod.ChatDemo),
-);
-
 export const CodeNodeParserDemo = dynamic(() =>
  import("@/components/demo/code-node-parser").then(
    (mod) => mod.CodeNodeParserDemo,
  ),
 );
-export const WorkflowStreamingDemo = dynamic(() =>
-  import("@/components/demo/workflow-streaming-ui").then(
-    (mod) => mod.WorkflowStreamingDemo,
-  ),
-);
@@ -1,152 +0,0 @@
-"use client";
-import FlowInput from "@/components/flow-input";
-import { Button } from "@/components/ui/button";
-import {
-  StartEvent,
-  StopEvent,
-  Workflow,
-  WorkflowEvent,
-} from "@llamaindex/workflow";
-import { ReactNode, startTransition, useState } from "react";
-import { StickToBottom, useStickToBottomContext } from "use-stick-to-bottom";
-
-class ComputeEvent extends WorkflowEvent<number> {
-  constructor(data: number) {
-    super(data);
-  }
-}
-
-class ComputeResultEvent extends WorkflowEvent<number> {
-  constructor(data: number) {
-    super(data);
-  }
-}
-
-type ContextData = {
-  sum: number;
-};
-
-const workflow = new Workflow<ContextData, number, number>();
-
-const max = 1000;
-const min = 100;
-
-workflow.addStep(
-  {
-    inputs: [StartEvent<number>],
-    outputs: [StopEvent<number>],
-  },
-  async (context, event) => {
-    const total = event.data;
-    for (let i = 0; i < total; i++) {
-      context.sendEvent(new ComputeEvent(i));
-    }
-    console.log("waiting");
-    const computeResults = await Promise.all(
-      Array.from({ length: total }).map(() =>
-        context.requireEvent(ComputeResultEvent),
-      ),
-    );
-    context.data.sum = computeResults.reduce(
-      (acc, result) => acc + result.data,
-      0,
-    );
-    console.log("stop");
-    return new StopEvent(context.data.sum);
-  },
-);
-
-workflow.addStep(
-  {
-    inputs: [ComputeEvent],
-    outputs: [ComputeResultEvent],
-  },
-  async (context, event) => {
-    await new Promise((resolve) =>
-      setTimeout(resolve, Math.floor(Math.random() * (max - min + 1) + min)),
-    );
-    return new ComputeResultEvent(event.data);
-  },
-);
-
-function ScrollToBottom() {
-  const { isAtBottom, scrollToBottom } = useStickToBottomContext();
-
-  return (
-    !isAtBottom && (
-      <button
-        className="i-ph-arrow-circle-down-fill absolute bottom-0 left-[50%] translate-x-[-50%] rounded-lg text-4xl"
-        onClick={() => scrollToBottom()}
-      />
-    )
-  );
-}
-
-export function WorkflowStreamingDemo() {
-  const [ui, setUI] = useState<ReactNode[]>([
-    <div key={0} className="bg-gray-100 dark:bg-gray-800">
-      Waiting for workflow to start
-    </div>,
-  ]);
-  const [total, setTotal] = useState<number>(10);
-
-  return (
-    <div className="flex w-full flex-col items-start gap-2">
-      <div className="flex flex-row items-center justify-center">
-        <div className="mr-2 text-lg">Compute total</div>{" "}
-        <FlowInput value={total} onChange={(value) => setTotal(value)} />
-      </div>
-      <Button
-        onClick={async () => {
-          startTransition(() => {
-            setUI([]);
-          });
-          const context = workflow.run(total, {
-            sum: 0,
-          });
-          let i = 0;
-          for await (const event of context) {
-            console.log(event);
-            if (event instanceof ComputeEvent) {
-              setUI((ui) => [
-                ...ui,
-                <div key={i++} className="bg-yellow-100 dark:bg-yellow-800">
-                  Computing task id: {event.data}
-                </div>,
-              ]);
-            } else if (event instanceof ComputeResultEvent) {
-              setUI((ui) => [
-                ...ui,
-                <div key={i++} className="bg-green-100 dark:bg-green-800">
-                  Computed task id: {event.data}
-                </div>,
-              ]);
-            } else if (event instanceof StartEvent) {
-              setUI((ui) => [
-                ...ui,
-                <div key={i++} className="bg-blue-100 dark:bg-blue-800">
-                  Started workflow with total {event.data}
-                </div>,
-              ]);
-            } else if (event instanceof StopEvent) {
-              setUI((ui) => [
-                ...ui,
-                <div key={i++} className="bg-red-100 dark:bg-red-800">
-                  Workflow stopped
-                </div>,
-              ]);
-            }
-          }
-        }}
-      >
-        Start Workflow
-      </Button>
-      <StickToBottom className="flex max-h-96 w-full flex-col gap-2 overflow-y-auto rounded-lg border border-gray-200 p-2">
-        <StickToBottom.Content className="flex flex-col gap-2">
-          {ui}
-        </StickToBottom.Content>
-        <ScrollToBottom />
-      </StickToBottom>
-    </div>
-  );
-}
@@ -1,4 +1,4 @@
-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";
 import { LucideIcon } from "lucide-react";
 import { HTMLAttributes, ReactElement, ReactNode } from "react";

@@ -1,6 +1,6 @@
 "use client";
 import { Button } from "@/components/ui/button";
-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";
 import { CodeBlock } from "fumadocs-ui/components/codeblock";
 import { RotateCcw } from "lucide-react";
 import { useTheme } from "next-themes";
@@ -1,6 +1,6 @@
 "use client";

-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";
 import Image from "next/image";
 import { ReactNode } from "react";
 import { IconAI, IconUser } from "./ui/icons";
@@ -1,4 +1,4 @@
-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";
 import {
  AnimatePresence,
  motion,
@@ -1,7 +1,7 @@
 import { cva, type VariantProps } from "class-variance-authority";
 import * as React from "react";

-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";

 const alertVariants = cva(
  "relative w-full rounded-lg border px-4 py-3 text-sm [&>svg+div]:translate-y-[-3px] [&>svg]:absolute [&>svg]:left-4 [&>svg]:top-4 [&>svg]:text-foreground [&>svg~*]:pl-7",
@@ -1,7 +1,7 @@
 import { cva, type VariantProps } from "class-variance-authority";
 import * as React from "react";

-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";

 const badgeVariants = cva(
  "inline-flex items-center rounded-md border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2",
@@ -2,7 +2,7 @@ import { Slot } from "@radix-ui/react-slot";
 import { cva, type VariantProps } from "class-variance-authority";
 import * as React from "react";

-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";

 const buttonVariants = cva(
  "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0",
@@ -4,7 +4,7 @@ import * as DialogPrimitive from "@radix-ui/react-dialog";
 import { Cross2Icon } from "@radix-ui/react-icons";
 import * as React from "react";

-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";

 const Dialog = DialogPrimitive.Root;

@@ -1,4 +1,4 @@
-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";

 export function IconAI({ className, ...props }: React.ComponentProps<"svg">) {
  return (
@@ -1,5 +1,5 @@
 "use client";
-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";
 import { animate, motion, useMotionValue } from "framer-motion";
 import { useEffect, useState } from "react";
 import useMeasure from "react-use-measure";
@@ -1,6 +1,6 @@
 import * as React from "react";

-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";

 export type InputProps = React.InputHTMLAttributes<HTMLInputElement>;

@@ -4,7 +4,7 @@ import * as LabelPrimitive from "@radix-ui/react-label";
 import { cva, type VariantProps } from "class-variance-authority";
 import * as React from "react";

-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";

 const labelVariants = cva(
  "text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70",
@@ -1,4 +1,4 @@
-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";

 function Skeleton({
  className,
@@ -3,7 +3,7 @@
 import * as SliderPrimitive from "@radix-ui/react-slider";
 import * as React from "react";

-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";

 const Slider = React.forwardRef<
  React.ElementRef<typeof SliderPrimitive.Root>,
@@ -1,6 +1,6 @@
 import * as React from "react";

-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";

 export type TextareaProps = React.TextareaHTMLAttributes<HTMLTextAreaElement>;

@@ -3,7 +3,7 @@
 import * as TooltipPrimitive from "@radix-ui/react-tooltip";
 import * as React from "react";

-import { cn } from "@/lib/utils";
+import { cn } from "@/libs/utils";

 const TooltipProvider = TooltipPrimitive.Provider;

@@ -1,8 +0,0 @@
---
-title: LlamaCloud
-description: LlamaCloud is a new generation of managed parsing, ingestion, and retrieval services, designed to bring production-grade context-augmentation to your LLM and RAG applications.
---
-
-This is TypeScript binding for LlamaCloud API. It provides a simple way to interact with LlamaCloud API.
-
-If you are looking for the official documentation, please visit the [Official Document](https://docs.cloud.llamaindex.ai/)
@@ -1,6 +0,0 @@
-{
-  "title": "LlamaCloud",
-  "description": "The Cloud framework for LLM",
-  "root": true,
-  "pages": ["---Guide---", "index", "..."]
-}
@@ -0,0 +1,60 @@
+---
+title: High-Level Concepts
+---
+
+This is a quick guide to the high-level concepts you'll encounter frequently when building LLM applications.
+
+## Large Language Models (LLMs)
+
+LLMs are the fundamental innovation that launched LlamaIndex. They are an artificial intelligence (AI) computer system that can understand, generate, and manipulate natural language, including answering questions based on their training data or data provided to them at query time. 
+
+## Agentic Applications
+
+When an LLM is used within an application, it is often used to make decisions, take actions, and/or interact with the world. This is the core definition of an **agentic application**.
+
+While the definition of an agentic application is broad, there are several key characteristics that define an agentic application:
+
+- **LLM Augmentation**: The LLM is augmented with tools (i.e. arbitrary callable functions in code), memory, and/or dynamic prompts.
+- **Prompt Chaining**: Several LLM calls are used that build on each other, with the output of one LLM call being used as the input to the next.
+- **Routing**: The LLM is used to route the application to the next appropriate step or state in the application.
+- **Parallelism**: The application can perform multiple steps or actions in parallel.
+- **Orchestration**: A hierarchical structure of LLMs is used to orchestrate lower-level actions and LLMs.
+- **Reflection**: The LLM is used to reflect and validate outputs of previous steps or LLM calls, which can be used to guide the application to the next appropriate step or state.
+
+In LlamaIndex, you can build agentic applications by using the workflows to orchestrate a sequence of steps and LLMs. You can [learn more about workflows](/docs/llamaindex/tutorials/workflows).
+
+## Agents
+
+We define an agent as a specific instance of an "agentic application". An agent is a piece of software that semi-autonomously performs tasks by combining LLMs with other tools and memory, orchestrated in a reasoning loop that decides which tool to use next (if any).
+
+What this means in practice, is something like:
+- An agent receives a user message
+- The agent uses an LLM to determine the next appropriate action to take using the previous chat history, tools, and the latest user message
+- The agent may invoke one or more tools to assist in the users request
+- If tools are used, the agent will then interpret the tool outputs and use them to inform the next action
+- Once the agent stops taking actions, it returns the final output to the user
+
+You can [learn more about agents](/docs/llamaindex/tutorials/basic_agent).
+
+## Retrieval Augmented Generation (RAG)
+
+Retrieval-Augmented Generation (RAG) is a core technique for building data-backed LLM applications with LlamaIndex. It allows LLMs to answer questions about your private data by providing it to the LLM at query time, rather than training the LLM on your data. To avoid sending **all** of your data to the LLM every time, RAG indexes your data and selectively sends only the relevant parts along with your query. You can [learn more about RAG](/docs/llamaindex/tutorials/rag).
+
+## Use cases
+
+There are endless use cases for data-backed LLM applications but they can be roughly grouped into four categories:
+
+[**Agents**](/docs/llamaindex/tutorials/basic_agent):
+An agent is an automated decision-maker powered by an LLM that interacts with the world via a set of [tools](/docs/llamaindex/modules/agents/tool). Agents can take an arbitrary number of steps to complete a given task, dynamically deciding on the best course of action rather than following pre-determined steps. This gives it additional flexibility to tackle more complex tasks.
+
+[**Workflows**](/docs/llamaindex/tutorials/workflows):
+A Workflow in LlamaIndex is a specific event-driven abstraction that allows you to orchestrate a sequence of steps and LLMs calls. Workflows can be used to implement any agentic application, and are a core component of LlamaIndex.
+
+[**Structured Data Extraction**](/docs/llamaindex/tutorials/structured_data_extraction):
+Pydantic extractors allow you to specify a precise data structure to extract from your data and use LLMs to fill in the missing pieces in a type-safe way. This is useful for extracting structured data from unstructured sources like PDFs, websites, and more, and is key to automating workflows.
+
+[**Query Engines**](/docs/llamaindex/modules/rag/query_engines):
+A query engine is an end-to-end flow that allows you to ask questions over your data. It takes in a natural language query, and returns a response, along with reference context retrieved and passed to the LLM.
+
+[**Chat Engines**](/docs/llamaindex/modules/rag/chat_engine):
+A chat engine is an end-to-end flow for having a conversation with your data (multiple back-and-forth instead of a single question-and-answer).
@@ -9,10 +9,10 @@ To install llamaindex, run the following command:
 npm i llamaindex
 ```

-In most cases, you'll also need an LLM package to use LlamaIndex. For example, to use the OpenAI LLM, you would install the following:
+In most cases, you'll also need an LLM package and the Workflow package to use LlamaIndex. For example, to use the OpenAI LLM with agents, you would install the following:

 ```package-install
-npm i @llamaindex/openai
+npm i @llamaindex/openai @llamaindex/workflow
 ```

 Go to [LLM APIs](/docs/llamaindex/modules/models/llms) to find out how to use other LLMs.
@@ -40,19 +40,7 @@ Make sure to set [moduleResolution](https://www.typescriptlang.org/docs/handbook
 }
 ```

-We recommend using `bundler` or `nodenext`, but due to popularity of `node`, we still added support for it, but with import path limitations.
-
-So you may encounter type errors when importing sub paths from the `llamaindex` package like:
-
-```ts
-import { Settings } from "llamaindex";
-```
-
-The simplest way to fix this without changing `moduleResolution` is to import directly from `llamaindex`:
-
-```ts
-import { Settings } from "llamaindex";
-```
+We recommend using `bundler` or `nodenext`, but due to popularity of `node`, we still added support for it.

 ## Enable AsyncIterable for `Web Stream` API

@@ -68,7 +56,8 @@ Some modules uses `Web Stream` API like `ReadableStream` and `WritableStream`, y
 ```

 ```typescript
-import { agent, tool } from 'llamaindex'
+import { tool } from 'llamaindex'
+import { agent } from "@llamaindex/workflow";
 import { openai } from "@llamaindex/openai";

 Settings.llm = openai({
@@ -1,4 +1,4 @@
 {
  "title": "Getting Started",
-  "pages": ["installation", "create_llama", "examples"]
+  "pages": ["concepts", "installation", "create_llama", "examples"]
 }
@@ -12,7 +12,8 @@ Agent Workflows are a powerful system that enables you to create and orchestrate
 The simplest use case is creating a single agent with specific tools. Here's an example of creating an assistant that tells jokes:

 ```typescript
-import { agent, tool } from "llamaindex";
+import { tool } from "llamaindex";
+import { agent } from "@llamaindex/workflow";
 import { openai } from "@llamaindex/openai";

 // Define a joke-telling tool
@@ -32,7 +33,7 @@ const jokeAgent = agent({

 // Run the workflow
 const result = await jokeAgent.run("Tell me something funny");
-console.log(result); // Baby Llama is called cria
+console.log(result.data.result); // Baby Llama is called cria
 ```

 ### Event Streaming
@@ -40,17 +41,17 @@ console.log(result); // Baby Llama is called cria
 Agent Workflows provide a unified interface for event streaming, making it easy to track and respond to different events during execution:

 ```typescript
-import { AgentToolCall, AgentStream } from "llamaindex";
+import { agentToolCallEvent, agentStreamEvent } from "@llamaindex/workflow";

 // Get the workflow execution context
-const context = workflow.run("Tell me something funny");
+const events = jokeAgent.runStream("Tell me something funny");

 // Stream and handle events
-for await (const event of context) {
-  if (event instanceof AgentToolCall) {
+for await (const event of events) {
+  if (agentToolCallEvent.include(event)) {
    console.log(`Tool being called: ${event.data.toolName}`);
  }
-  if (event instanceof AgentStream) {
+  if (agentStreamEvent.include(event)) {
    process.stdout.write(event.data.delta);
  }
 }
@@ -68,7 +69,8 @@ An Agent Workflow can orchestrate multiple agents, enabling complex interactions
 Here's an example of a multi-agent system that combines joke-telling and weather information:

 ```typescript
-import { multiAgent, agent, tool } from "llamaindex";
+import { tool } from "llamaindex";
+import { multiAgent, agent } from "@llamaindex/workflow";
 import { openai } from "@llamaindex/openai";
 import { z } from "zod";

@@ -110,6 +112,7 @@ const agents = multiAgent({
 const result = await agents.run(
  "Give me a morning greeting with a joke and the weather in San Francisco"
 );
+console.log(result.data.result);
 ```

 The workflow will coordinate between agents, allowing them to handle different aspects of the request and hand off tasks when appropriate.
@@ -1,4 +1,4 @@
 {
  "title": "Agents",
-  "pages": ["tool", "agent_workflow", "workflows"]
+  "pages": ["tool", "agent_workflow", "workflows", "natural_language_workflow"]
 }
@@ -0,0 +1,103 @@
+---
+title: Define workflows using natural language  
+---
+
+When working with Workflows, you have to write code to handle an event in the workflow. 
+Often, the logic of the handler is not too complex so that it can be expressed using natural language and executed by an LLM.  
+Besides the instructions, we just need the expected result event of the step, possible tool calls and optionally other events that can be emitted.  
+
+## Usage
+
+Let's take an example of a workflow that generates a joke, gets a critique for it, and then improves it.
+
+### Define the events
+
+First, we define the events for our workflow. We need one for writing the joke, one for critiquing it, and one for the final result:
+
+```typescript
+import { z } from "zod";
+import { zodEvent } from "@llamaindex/workflow";
+
+const writeJokeSchema = z.object({
+  description: z
+    .string()
+    .describe("The topic to write a joke or describe the joke to improve."),
+  writtenJoke: z.optional(z.string()).describe("The written joke."),
+  retriedTimes: z
+    .number()
+    .default(0)
+    .describe(
+      "The retried times for writing the joke. Always increase this from the input retriedTimes.",
+    ),
+});
+
+const critiqueSchema = z.object({
+  joke: z.string().describe("The joke to critique"),
+  retriedTimes: z.number().describe("The retried times for writing the joke."),
+});
+
+const finalResultSchema = z.object({
+  joke: z.string().describe("The joke to critique"),
+  critique: z.string().describe("The critique of the joke"),
+});
+
+const writeJokeEvent = zodEvent(writeJokeSchema, {
+  debugLabel: "writeJokeEvent",
+}); 
+const critiqueEvent = zodEvent(critiqueSchema, {
+  debugLabel: "critiqueEvent",
+}); 
+const finalResultEvent = zodEvent(finalResultSchema, {
+  debugLabel: "finalResultEvent",
+}); 
+```
+
+Note that your natural language workflows the events need to be created by the `zodEvent` function passing the zod schema as an argument. The agent needs the schema of the event data to correctly generate events.  
+Also, we need a `debugLabel` so the LLM can identify the event to emit in the workflow.
+
+### Define the workflow
+
+As usual you first create the workflow:
+
+```typescript
+import { agentHandler, createWorkflow } from "@llamaindex/workflow";
+
+const jokeFlow = createWorkflow();
+```
+
+Then you need to handle the events. For the handlers, instead of code, you're now going to use natural language by calling the `agentHandler` function.
+
+It only requires two parameters:
+- `instructions`: A prompt to guide the agent how to handle the steps.
+- `results`: The output events that the agent should return after handling the step.
+
+Then you will have a simple code to handle the step:
+
+```typescript
+jokeFlow.handle(
+  [writeJokeEvent],
+  agentHandler({
+    instructions: `You are a joke writer. You are given a topic and you need to write a joke about it.`,
+    results: [critiqueEvent],
+  }),
+);
+
+jokeFlow.handle(
+  [critiqueEvent],
+  agentHandler({
+    instructions: `
+You are given a joke and you need to critique it. Follow the following guidelines:
+1. You have maximum 3 times to improve the joke.
+2. If the joke is not good, increase the retriedTimes, describe how to improve the joke and send a writeJokeEvent.
+3. If the joke is good, trigger the finalResultEvent event.
+`,
+    results: [writeJokeEvent, finalResultEvent],
+  }),
+);
+```
+
+For advanced usage, you can add more functionality to `agentHandler` by using these parameters:
+- `events`: A list of additional events that the agent can emit to the workflow. E.g., your agent can emit a `uiEvent` to update the UI during the execution.
+- `tools`: A list of tools that the agent can use to handle the step. E.g., your agent can use a `search` tool to search the web.
+
+You can find more code examples in the [examples](https://github.com/run-llama/LlamaIndexTS/tree/main/examples/agents/natural) folder.
@@ -17,7 +17,8 @@ The `parameters` field in the tool configuration is defined using `zod`, a TypeS

 Example:
 ```ts
-import { agent, tool } from "llamaindex";
+import { tool } from "llamaindex";
+import { agent } from "@llamaindex/workflow";
 import { z } from "zod";

 // first arg is LLM input, second is bound arg
@@ -46,7 +47,7 @@ In this example, `z.object` is used to define a schema for the `parameters` wher
 You can import built-in tools from the `@llamaindex/tools` package.

 ```ts
-import { agent } from "llamaindex";
+import { agent } from "@llamaindex/workflow";
 import { wiki } from "@llamaindex/tools";

 const researchAgent = agent({
@@ -57,6 +58,50 @@ const researchAgent = agent({
 });
 ```

+## MCP tools
+
+If you have a MCP server running, you can fetch tools from the server and use them in your agents.
+
+```ts
+// 1. Import MCP tools adapter
+import { mcp } from "@llamaindex/tools";
+import { agent } from "@llamaindex/workflow";
+
+// 2. Initialize a MCP client
+// by npx
+const server = mcp({
+  command: "npx",
+  args: ["-y", "@modelcontextprotocol/server-filesystem", "."],
+  verbose: true,
+});
+// or by StreamableHTTP transport
+const server = mcp({
+  url: "http://localhost:8000/mcp",
+  verbose: true,
+});
+
+// if your MCP server is not using StreamableHTTP transport, you can also use SSE transport
+// by setting useSSETransport to true.
+// See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse-deprecated
+const server = mcp({
+  url: "http://localhost:8000/mcp",
+  useSSETransport: true,
+  verbose: true,
+});
+
+// 3. Get tools from MCP server
+const tools = await server.tools();
+
+// Now you can create an agent with the tools
+const agent = agent({
+  name: "My Agent",
+  systemPrompt: "You are a helpful assistant that can use the provided tools to answer questions.",
+  llm: openai({ model: "gpt-4o" }),
+  tools: tools,
+});
+```
+
+
 ## Function tool

 You can still use the `FunctionTool` class to define a tool.
@@ -79,7 +124,8 @@ Note: calling the `bind` method will return a new `FunctionTool` instance, witho

 Example to pass a `userToken` as additional argument:
 ```ts
-import { agent, tool } from "llamaindex";
+import { tool } from "llamaindex";
+import { agent } from "@llamaindex/workflow";

 // first arg is LLM input, second is bound arg
 const queryKnowledgeBase = async ({ question }, { userToken }) => {
@@ -2,149 +2,20 @@
 title: Workflows
 ---

-A `Workflow` in LlamaIndexTS is an event-driven abstraction used to chain together several events. Workflows are made up of `steps`, with each step responsible for handling certain event types and emitting new events.
+A `Workflow` in LlamaIndex is a lightweight, event-driven abstraction used to chain together several events. Workflows are made up of `handlers`, with each one responsible for processing specific event types and emitting new events.

-Workflows in LlamaIndexTS work by defining step functions that handle specific event types and emit new events.
+Workflows are designed to be flexible and can be used to build agents, RAG flows, extraction flows, or anything else you want to implement.

-When a step function is added to a workflow, you need to specify the input and optionally the output event types (used for validation). The specification of the input events ensures each step only runs when an accepted event is ready.
+To use workflows install this package:

-You can create a `Workflow` to do anything! Build an agent, a RAG flow, an extraction flow, or anything else you want.
+```package-install
+npm i @llamaindex/workflow-core
+```

+This contains the core functionality for the workflow system. You can read more about the core concepts in the [workflow-core](/docs/workflows) section.
+
+In contrast, the `@llamaindex/workflow` package contains more utiltities, such as prebuilt agents.

 ```package-install
 npm i @llamaindex/workflow
 ```
-
-## Getting Started
-
-As an illustrative example, let's consider a naive workflow where a joke is generated and then critiqued.
-
-<include cwd>../../examples/workflow/joke.ts</include>
-
-There's a few moving pieces here, so let's go through this piece by piece.
-
-### Defining Workflow Events
-
-```typescript
-export class JokeEvent extends WorkflowEvent<{ joke: string }> {}
-```
-
-Events are user-defined classes that extend `WorkflowEvent` and contain arbitrary data provided as template argument. In this case, our workflow relies on a single user-defined event, the `JokeEvent` with a `joke` attribute of type `string`.
-
-### Setting up the Workflow Class
-
-```typescript
-const llm = new OpenAI();
-...
-const jokeFlow = new Workflow<unknown, string, string>();
-```
-
-Our workflow is implemented by initiating the `Workflow` class with three generic types: the context type (unknown), input type (string), and output type (string). The context type is `unknown`, as we're not using a shared context in this example.
-
-For simplicity, we created an `OpenAI` llm instance that we're using for inference in our workflow.
-
-### Workflow Entry Points
-
-```typescript
-const generateJoke = async (_: unknown, ev: StartEvent<string>) => {
-  const prompt = `Write your best joke about ${ev.data}.`;
-  const response = await llm.complete({ prompt });
-  return new JokeEvent({ joke: response.text });
-};
-```
-
-Here, we come to the entry-point of our workflow. While events are user-defined, there are two special-case events, the `StartEvent` and the `StopEvent`. These events are predefined, but we can specify the payload type using generic types. We're using `StartEvent<string>` to indicate that we're going to send an input of type string.
-
-To add this step to the workflow, we use the `addStep` method with an object specifying the input and output event types:
-
-```typescript
-jokeFlow.addStep(
-  {
-    inputs: [StartEvent<string>],
-    outputs: [JokeEvent],
-  },
-  generateJoke
-);
-```
-
-### Workflow Exit Points
-
-```typescript
-const critiqueJoke = async (_: unknown, ev: JokeEvent) => {
-  const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
-  const response = await llm.complete({ prompt });
-  return new StopEvent(response.text);
-};
-```
-
-Here, we have our second and last step in the workflow. We know it's the last step because the special `StopEvent` is returned. When the workflow encounters a returned `StopEvent`, it immediately stops the workflow and returns the result. Note that we're using the generic type `StopEvent<string>` to indicate that we're returning a string.
-
-Add this step to the workflow:
-
-```typescript
-jokeFlow.addStep(
-  {
-    inputs: [JokeEvent],
-    outputs: [StopEvent<string>],
-  },
-  critiqueJoke
-);
-```
-
-### Running the Workflow
-
-```typescript
-const result = await jokeFlow.run("pirates");
-console.log(result.data.result);
-```
-
-Lastly, we run the workflow. The `.run()` method is async, so we use await here to wait for the result.
-
-## Working with Shared Context/State
-
-Optionally, you can choose to use a shared context between steps by specifying a context type when creating the workflow. Here's an example where multiple steps access a shared state:
-
-```typescript
-import { HandlerContext } from "llamaindex";
-
-type MyContextData = {
-  query: string;
-  intermediateResults: any[];
-}
-
-const query = async (context: HandlerContext<MyContextData>, ev: MyEvent) => {
-  // get the query from the context
-  const query = context.data.query;
-  // do something with context and event
-  const val = ...
-  // store in context
-  context.data.intermediateResults.push(val);
-
-  return new StopEvent({ result });
-};
-```
-
-## Waiting for Multiple Events
-
-The context does more than just hold data, it also provides utilities to buffer and wait for multiple events.
-
-For example, you might have a step that waits for a query and retrieved nodes before synthesizing a response:
-
-```typescript
-const synthesize = async (context: Context, ev1: QueryEvent, ev2: RetrieveEvent) => {
-  const subPrompts = [`Answer this query using the context provided: ${ev1.data.query}`, `Context: ${ev2.data.context}`];
-  const prompt = subPrompts.join("\n");
-  const response = await llm.complete({ prompt });
-  return new StopEvent({ result: response.text });
-};
-```
-
-Passing multiple events, we can buffer and wait for ALL expected events to arrive. The receiving step function will only be called once all events have arrived.
-
-## Manually Triggering Events
-
-Normally, events are triggered by returning another event during a step. However, events can also be manually dispatched using the `ctx.sendEvent(event)` method within a workflow.
-
-## Examples
-
-You can find many useful examples of using workflows in the [examples folder](https://github.com/run-llama/LlamaIndexTS/blob/main/examples/workflow).
@@ -0,0 +1,182 @@
+---
+title: Memory
+description: Manage conversation history and context with agents
+---
+
+## Concept
+
+Memory is a core component of agentic systems. It allows you to store and retrieve information from the past.
+
+In LlamaIndexTS, you can create memory by using the `createMemory` function. This function will return a `Memory` object, which you can then use to store and retrieve information.
+
+As the agent runs, it will make calls to `add()` to store information, and `get()` to retrieve information. 
+
+## Usage
+
+A `Memory` object has both short-term memory (i.e. a FIFO queue of messages) and optionally long-term memory (i.e. extracting information over time).
+
+`get()` always returns all messages stored in the memory. The longer the agent runs, this will exceed the context window of the agent. To avoid this, the agent is using the `getLLM` method to get the last X messages that fit into the context window.
+
+### Configuring Memory for an Agent
+
+Here we're creating a memory with a static block (read more about [memory blocks](#long-term-memory)) that contains some information about the user.
+
+```ts twoslash
+import { openai } from "@llamaindex/openai";
+import { agent } from "@llamaindex/workflow";
+import { createMemory, staticBlock } from "llamaindex";
+
+const llm = openai({ model: "gpt-4.1-mini" });
+
+// Create memory with predefined context
+const memory = createMemory({
+  memoryBlocks: [
+    staticBlock({
+      content:
+        "The user is a software engineer who loves TypeScript and LlamaIndex.",
+    }),
+  ],
+});
+
+// Create an agent with the memory
+const workflow = agent({
+  name: "assistant",
+  llm,
+  memory,
+});
+
+const result = await workflow.run("What is my name?");
+console.log("Response:", result.data.result);
+```
+
+### Using Vercel format
+
+You can also put messages in Vercel format directly to the memory:
+
+```ts
+await memory.add({
+  id: "1",
+  createdAt: new Date(),
+  role: "user",
+  content: "Hello!",
+  options: {
+    parts: [
+      {
+        type: "file",
+        data: "base64...",
+        mimeType: "image/png",
+      },
+    ],
+  },
+});
+```
+
+If you call `get`, messages are usually retrieved in the LlamaIndexTS format (type `ChatMessage`). If you specify the `type` parameter using `get`, you can return the messages in different formats. E.g.: using `type: "vercel"`, you can return the messages in Vercel format:
+
+```ts
+const messages = await memory.get({ type: "vercel" });
+console.log(messages);
+```
+
+## Customizing Memory
+
+### Short-Term Memory
+
+The `Memory` object will store all the messages that are added to the `Memory` object. Unless you call `clear()`, no messages are removed from the memory. This is the short-term memory (usually you will store the memory of one user session there) which is augmented by the long-term memory.
+
+Calling `getLLM` will retrieve messages from long-term memory and ensure that the given `tokenLimit` is not reached. These are the messages that you will sent to the LLM.
+
+For initialization, you call `createMemory` with the following options:
+
+- `tokenLimit`: Maximum tokens for memory retrieval using `getLLM` (default: 30000).
+- `shortTermTokenLimitRatio`: Ratio of tokens for short-term vs long-term memory (default: 0.7)
+- `customAdapters`: Custom message adapters for different message formats. LlamaIndex (`ChatMessageAdapter`) and Vercel (`VercelMessageAdapter`) are built-in adapters.
+- `memoryBlocks`: Memory blocks for long-term storage, see [Long-Term Memory](#long-term-memory)
+
+Example:
+
+```ts
+const memory = createMemory({
+    tokenLimit=40000,
+    shortTermTokenLimitRatio=0.5,
+});
+```
+
+### Long-Term Memory
+
+Long-term memory is represented as `Memory Block` objects. These objects contain information that are from previous user sessions or from the beginning of the current conversation. When memory is retrieved (by calling `getLLM`), the short-term and long-term memories are merged together within the given `tokenLimit`. 
+
+Currently, there are two predefined memory blocks:
+
+- `staticBlock`: A memory block that stores a static piece of information.
+- `factExtractionBlock`: A memory block that extracts facts from the chat history.
+
+This sounds a bit complicated, but it's actually quite simple. Let's look at an example:
+
+```ts
+import { createMemory, factExtractionBlock, staticBlock } from "llamaindex";
+
+const memoryBlocks= [
+  staticBlock({
+    id: "core_info",
+    content: "My name is Logan, and I live in Saskatoon. I work at LlamaIndex.",
+  }),
+  factExtractionBlock({
+    id: "user-extracted_info",
+    priority: 1,
+    llm: llm,
+    maxFacts: 50,
+  }),
+];
+```
+
+Here, we've setup two memory blocks:
+
+- `core_info`: A static memory block that stores some core information about the user. This information will always be inserted into the memory. The type used is `MessageContent` to support multi-modal content.
+- `extracted_info`: An extracted memory block that will extract information from the chat history. Here we've passed in the `llm` to use to extract facts from the chat history, and set the `maxFacts` to 50. If the number of extracted facts exceeds this limit, the `maxFacts` will be automatically summarized and reduced to leave room for new information.
+
+You'll also notice that we've set the `priority` for the `factExtractionBlock` block. This is used to determine the handling when the memory blocks content (i.e. long-term memory) + short-term memory exceeds the token limit on the `Memory` object.
+
+- `priority=0`: This block will always be kept in memory (`staticBlocks` always have priority 0.)
+- `priority=1, 2, 3, etc`: This determines the order in which memory blocks are truncated when the memory exceeds the token limit, to help the overall short-term memory + long-term memory content be less than or equal to the `tokenLimit`.
+
+Now, let's pass these blocks into the `createMemory` function:
+
+```ts
+const memory = createMemory({
+  tokenLimit: 40000,
+  memoryBlocks: memoryBlocks,
+)
+```
+
+When memory is retrieved (using `getLLM`), the short-term and long-term memories are merged together. The `Memory` object will ensure that the short-term memory + long-term memory content is less than or equal to the `tokenLimit`. If it is longer, messages are retrieved in the following order:
+
+1. StaticMemoryBlock (information always included)
+2. LongTermMemoryBlock (depending on priority)
+3. ShortTermMemoryBlock 
+4. Transient messages
+
+The amount of short-term memory included is specified by the `shortTermTokenLimitRatio`. If it's set to `0.7`, 70% of the `tokenLimit` is used for short-term memory (not including the static memory block).
+
+## Persistence with Snapshots
+
+Save and restore memory state:
+
+```ts twoslash
+import { createMemory, loadMemory } from "llamaindex";
+
+const memory = createMemory();
+
+// Add some messages
+await memory.add({ role: "user", content: "Hello!" });
+
+// Create snapshot
+const snapshot = memory.snapshot();
+
+// Later, restore from the snapshot
+const restoredMemory = loadMemory(snapshot);
+```
+
+## Examples
+
+Want to learn more about the Memory class? Check out our example codes in [Github](https://github.com/run-llama/LlamaIndexTS/tree/main/examples/agents/memory).
@@ -1,4 +1,11 @@
 {
  "title": "Data",
-  "pages": ["index", "readers", "data_index", "ingestion_pipeline", "stores"]
+  "pages": [
+    "index",
+    "memory",
+    "readers",
+    "data_index",
+    "ingestion_pipeline",
+    "stores"
+  ]
 }
@@ -5,6 +5,12 @@ title: DiscordReader
 DiscordReader is a simple data loader that reads all messages in a given Discord channel and returns them as Document objects.
 It uses the [@discordjs/rest](https://github.com/discordjs/discord.js/tree/main/packages/rest) library to fetch the messages.

+## Installation
+
+```package-install
+npm install @llamaindex/discord
+```
+
 ## Usage

 First step is to create a Discord Application and generating a bot token [here](https://discord.com/developers/applications).
@@ -12,7 +18,7 @@ In your Discord Application, go to the `OAuth2` tab and generate an invite URL b
 This will invite the bot with the necessary permissions to read messages.
 Copy the URL in your browser and select the server you want your bot to join.

-<include cwd>../../examples/readers/src/discord.ts</include>
+<include cwd>../../examples/readers/discord/reader.ts</include>

 ### Params

@@ -21,27 +21,18 @@ To install readers call:

 We offer readers for different file formats.

-```ts twoslash 
-import { CSVReader } from '@llamaindex/readers/csv'
-import { PDFReader } from '@llamaindex/readers/pdf'
-import { JSONReader } from '@llamaindex/readers/json'
-import { MarkdownReader } from '@llamaindex/readers/markdown'
-import { HTMLReader } from '@llamaindex/readers/html'
-// you can find more readers in the documentation
+```ts twoslash
+import { CSVReader } from '@llamaindex/readers/csv';
+import { DocxReader } from '@llamaindex/readers/docx';
+import { HTMLReader } from '@llamaindex/readers/html';
+import { ImageReader } from '@llamaindex/readers/image';
+import { JSONReader } from '@llamaindex/readers/json';
+import { MarkdownReader } from '@llamaindex/readers/markdown';
+import { ObsidianReader } from '@llamaindex/readers/obsidian';
+import { PDFReader } from '@llamaindex/readers/pdf';
+import { TextFileReader } from '@llamaindex/readers/text';
 ```

-Additionally the following loaders exist without separate documentation:
-
- `AssemblyAIReader` transcribes audio using [AssemblyAI](https://www.assemblyai.com/).
-  - [AudioTranscriptReader](/docs/api/classes/AudioTranscriptReader): loads entire transcript as a single document.
-  - [AudioTranscriptParagraphsReader](/docs/api/classes/AudioTranscriptParagraphsReader): creates a document per paragraph.
-  - [AudioTranscriptSentencesReader](/docs/api/classes/AudioTranscriptSentencesReader): creates a document per sentence.
-  - [AudioSubtitlesReader](/docs/api/classes/AudioTranscriptParagraphsReader): creates a document containing the subtitles of a transcript.
- [NotionReader](/docs/api/classes/NotionReader) loads [Notion](https://www.notion.so/) pages.
- [SimpleMongoReader](/docs/api/classes/SimpleMongoReader) loads data from a [MongoDB](https://www.mongodb.com/).
-
-Check the [LlamaIndexTS Github](https://github.com/run-llama/LlamaIndexTS) for the most up to date overview of integrations.
-
 ## SimpleDirectoryReader

 [Open in StackBlitz](https://stackblitz.com/github/run-llama/LlamaIndexTS/tree/main/examples/readers?file=src/simple-directory-reader.ts&title=Simple%20Directory%20Reader)
@@ -112,6 +112,3 @@ The returned `imageDocs` have the alt text assigned as text and the image path a

 You can see the full example file [here](https://github.com/run-llama/LlamaIndexTS/blob/main/examples/readers/src/llamaparse-json.ts).

-## API Reference
-
- [LlamaParseReader](/docs/api/classes/LlamaParseReader)
@@ -32,7 +32,7 @@ They can be divided into two groups.
 #### Advanced params:

 - `resultType` can be set to `markdown`, `text` or `json`. Defaults to `text`. More information about `json` mode on the next pages.
- `language` primarily helps with OCR recognition. Defaults to `en`. Click [here](/docs/api/type-aliases/Language) for a list of supported languages.
+- `language` primarily helps with OCR recognition. Defaults to `en`.
 - `parsingInstructions?` Optional. Can help with complicated document structures. See this [LlamaIndex Blog Post](https://www.llamaindex.ai/blog/launching-the-first-genai-native-document-parsing-platform) for an example.
 - `skipDiagonalText?` Optional. Set to true to ignore diagonal text. (Text that is not rotated 0, 90, 180 or 270 degrees)
 - `invalidateCache?` Optional. Set to true to ignore the LlamaCloud cache. All document are kept in cache for 48hours after the job was completed to avoid processing the same document twice. Can be useful for testing when trying to re-parse the same document with, e.g. different `parsingInstructions`.
@@ -61,4 +61,3 @@ Below a full example of `LlamaParse` integrated in `SimpleDirectoryReader` with
 ## API Reference

 - [SimpleDirectoryReader](/docs/api/classes/SimpleDirectoryReader)
- [LlamaParseReader](/docs/api/classes/LlamaParseReader)
@@ -98,5 +98,4 @@ You can assign any other values of the JSON response to the Document as needed.

 ## API Reference

- [LlamaParseReader](/docs/api/classes/LlamaParseReader)
 - [SimpleDirectoryReader](/docs/api/classes/SimpleDirectoryReader)
@@ -88,7 +88,7 @@ async function main() {

  const response = await queryEngine.query({
    query: "What did the author do in college?",
-  });
+  }); // Additional filters and params can be passed as options

  // Output response
  console.log(response.toString());
@@ -28,11 +28,12 @@ embedding vector(1536)
 );
 ```

-- Create a function for similarity search
+-- Create a function for similarity search with filtering support
 ```sql
 create function match_documents (
 query_embedding vector(1536),
-match_count int
+match_count int,
+filter jsonb DEFAULT '{}'
 ) returns table (
 id uuid,
 content text,
@@ -42,6 +43,7 @@ similarity float
 )
 language plpgsql
 as $$
+#variable_conflict use_column
 begin
 return query
 select
@@ -51,6 +53,7 @@ metadata,
 embedding,
 1 - (embedding <=> query_embedding) as similarity
 from documents
+where metadata @> filter
 order by embedding <=> query_embedding
 limit match_count;
 end;
@@ -95,6 +98,7 @@ const index = await VectorStoreIndex.fromDocuments(documents, {
 ```ts
 const queryEngine = index.asQueryEngine();

+// Basic query without filters
 const response = await queryEngine.query({
  query: "What is in the document?",
 });
@@ -103,6 +107,32 @@ const response = await queryEngine.query({
 console.log(response.toString());
 ```

+## Query with filters
+
+You can filter documents based on metadata when querying:
+
+```ts
+import { FilterOperator, MetadataFilters } from "llamaindex";
+
+// Create a filter for documents with author = "Jane Smith"
+const filters: MetadataFilters = {
+  filters: [
+    {
+      key: "author",
+      value: "Jane Smith",
+      operator: FilterOperator.EQ,
+    },
+  ],
+};
+
+// Query with filters
+const filteredResponse = await vectorStore.query({
+  queryEmbedding: embedModel.getQueryEmbedding("What is vector search?"),
+  similarityTopK: 5,
+  filters,
+});
+```
+
 ## Full code

 ```ts
@@ -2,89 +2,43 @@
 title: Azure OpenAI
 ---

-To use Azure OpenAI, you only need to set a few environment variables together with the `OpenAI` class.
-
-For example:
-
-## Environment Variables
-
-```
-export AZURE_OPENAI_KEY="<YOUR KEY HERE>"
-export AZURE_OPENAI_ENDPOINT="<YOUR ENDPOINT, see https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api>"
-export AZURE_OPENAI_DEPLOYMENT="gpt-4" # or some other deployment name
-```
+To use Azure OpenAI, you only need to install the `@llamaindex/azure` package:

 ## Installation

 ```package-install
-npm i llamaindex @llamaindex/openai
+npm i llamaindex @llamaindex/azure
 ```

 ## Usage

+The class `AzureOpenAI` is used for setting the LLM and `AzureOpenAIEmbedding` is used for setting the embedding model, e.g.:
+
 ```ts
 import { Settings } from "llamaindex";
-import { OpenAI } from "@llamaindex/openai";
+import { AzureOpenAI, AzureOpenAIEmbedding } from "@llamaindex/azure";

-Settings.llm = new OpenAI({ model: "gpt-4", temperature: 0 });
-```
-
-## Load and index documents
-
-For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
-
-```ts
-const document = new Document({ text: essay, id_: "essay" });
-
-const index = await VectorStoreIndex.fromDocuments([document]);
-```
-
-## Query
-
-```ts
-const queryEngine = index.asQueryEngine();
-
-const query = "What is the meaning of life?";
-
-const results = await queryEngine.query({
-  query,
+Settings.llm = new AzureOpenAI({ 
+  apiKey: '[key]',
+  deployment: '[model]',
+  apiVersion: '[version]',
+  endpoint: `https://[deployment].openai.azure.com/`, 
+});
+Settings.embedModel = new AzureOpenAIEmbedding({
+  apiKey: '[key]',
+  deployment: '[embedding-model]',
+  apiVersion: '[version]',
+  endpoint: `https://[deployment].openai.azure.com/`, 
 });
 ```

-## Full Example
+Instead of explicitly setting the API key, deployment, version, and endpoint in the constructor, you can use the following environment variables: `AZURE_OPENAI_DEPLOYMENT` for the model deployment name, `AZURE_OPENAI_KEY` for your API key, `AZURE_OPENAI_ENDPOINT` for your Azure endpoint URL, and `AZURE_OPENAI_API_VERSION` for the API version.

-```ts
-import { Document, VectorStoreIndex, Settings } from "llamaindex";
-import { OpenAI } from "@llamaindex/openai";
+## Examples

-Settings.llm = new OpenAI({ model: "gpt-4", temperature: 0 });
-
-async function main() {
-  const document = new Document({ text: essay, id_: "essay" });
-
-  // Load and index documents
-  const index = await VectorStoreIndex.fromDocuments([document]);
-
-  // get retriever
-  const retriever = index.asRetriever();
-
-  // Create a query engine
-  const queryEngine = index.asQueryEngine({
-    retriever,
-  });
-
-  const query = "What is the meaning of life?";
-
-  // Query
-  const response = await queryEngine.query({
-    query,
-  });
-
-  // Log the response
-  console.log(response.response);
-}
-```
+See the [Azure examples](https://github.com/run-llama/LlamaIndexTS/tree/main/examples/storage/azure) for more examples of how to use Azure OpenAI.

 ## API Reference

- [OpenAI](/docs/api/classes/OpenAI)
+- [AzureOpenAI](/docs/api/classes/AzureOpenAI)
+- [AzureOpenAIEmbedding](/docs/api/classes/AzureOpenAIEmbedding)
@@ -120,11 +120,11 @@ async function main() {

 ```ts
 import { BEDROCK_MODELS, Bedrock } from "@llamaindex/community";
-import { FunctionTool, LLMAgent } from "llamaindex";
+import { tool } from "llamaindex";
+import { agent } from "@llamaindex/workflow";
 import { z } from "zod";

-const sumNumbers = FunctionTool.from(
-  ({ a, b }: { a: number; b: number }) => `${a + b}`,
+const sumNumbers = tool(
  {
    name: "sumNumbers",
    description: "Use this function to sum two numbers",
@@ -136,11 +136,11 @@ const sumNumbers = FunctionTool.from(
        description: "The second number",
      }),
    }),
+    execute: ({ a, b }: { a: number; b: number }) => `${a + b}`,
  },
 );

-const divideNumbers = FunctionTool.from(
-  ({ a, b }: { a: number; b: number }) => `${a / b}`,
+const divideNumbers = tool(
  {
    name: "divideNumbers",
    description: "Use this function to divide two numbers",
@@ -152,6 +152,7 @@ const divideNumbers = FunctionTool.from(
        description: "The divisor b to divide by",
      }),
    }),
+    execute: ({ a, b }: { a: number; b: number }) => `${a / b}`,
  },
 );

@@ -161,15 +162,15 @@ const bedrock = new Bedrock({
 });

 async function main() {
-  const agent = new LLMAgent({
+  const myAgent = agent({
    llm: bedrock,
    tools: [sumNumbers, divideNumbers],
  });

-  const response = await agent.chat({
-    message: "How much is 5 + 5? then divide by 2",
-  });
+  const response = await myAgent.run(
+    "How much is 5 + 5? then divide by 2",
+  );

-  console.log(response.message);
+  console.log(response);
 }
 ```
@@ -11,58 +11,130 @@ npm i llamaindex @llamaindex/google
 ## Usage

 ```ts
-import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
 import { Settings } from "llamaindex";

-Settings.llm = new Gemini({
-  model: GEMINI_MODEL.GEMINI_PRO,
-});
-```
-
-## Usage with Proxy
-
-```ts
-import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
-import { Settings } from "llamaindex";
-
-Settings.llm = new Gemini({
-  model: GEMINI_MODEL.GEMINI_PRO,
-  requestOptions: {
-    baseUrl: <YOUR_PROXY_URL>   // optional, but useful for custom endpoints
-  }
+Settings.llm = gemini({
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH,
 });
 ```

 ### Usage with Vertex AI

-To use Gemini via Vertex AI you can use `GeminiVertexSession`.
-
-GeminiVertexSession accepts the env variables: `GOOGLE_VERTEX_LOCATION` and `GOOGLE_VERTEX_PROJECT`
+To use Gemini via Vertex AI, you can specify the vertex configuration:

 ```ts
-import { Gemini, GEMINI_MODEL, GeminiVertexSession } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";

-const gemini = new Gemini({
-  model: GEMINI_MODEL.GEMINI_PRO,
-  session: new GeminiVertexSession({
-    location: "us-central1",      // optional if provided by GOOGLE_VERTEX_LOCATION env variable
-    project: "project1",          // optional if provided by GOOGLE_VERTEX_PROJECT env variable
-    googleAuthOptions: {...},     // optional, but useful for production. It accepts all values from `GoogleAuthOptions`
-  }),
+const llm = gemini({
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH,
+  vertex: {
+    project: "your-cloud-project",    // required for Vertex AI
+    location: "us-central1",          // required for Vertex AI
+  },
 });
 ```

-[GoogleAuthOptions](https://github.com/googleapis/google-auth-library-nodejs/blob/main/src/auth/googleauth.ts)
-
 To authenticate for local development:

 ```bash
-npm i @google-cloud/vertexai
 gcloud auth application-default login
 ```

 To authenticate for production you'll have to use a [service account](https://cloud.google.com/docs/authentication/). `googleAuthOptions` has `credentials` which might be useful for you.

+## Multimodal Usage
+
+Gemini supports multimodal inputs including text, images, audio, and video:
+
+```ts
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
+import fs from "fs";
+
+const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
+
+const result = await llm.chat({
+  messages: [
+    {
+      role: "user",
+      content: [
+        {
+          type: "text",
+          text: "What's in this image?",
+        },
+        {
+          type: "image",
+          data: fs.readFileSync("./image.jpg").toString("base64"),
+          mimeType: "image/jpeg",
+        },
+      ],
+    },
+  ],
+});
+```
+
+## Tool Calling
+
+Gemini supports function calling with tools:
+
+```ts
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
+import { tool } from "llamaindex";
+import { z } from "zod";
+
+const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
+
+const result = await llm.chat({
+  messages: [
+    {
+      content: "What's the weather in Tokyo?",
+      role: "user",
+    },
+  ],
+  tools: [
+    tool({
+      name: "weather",
+      description: "Get the weather",
+      parameters: z.object({
+        location: z.string().describe("The location to get the weather for"),
+      }),
+      execute: ({ location }) => {
+        return `The weather in ${location} is sunny and hot`;
+      },
+    }),
+  ],
+});
+```
+
+## Live API (Real-time Conversations)
+
+For real-time audio/video conversations using [Gemini Live API](https://ai.google.dev/gemini-api/docs/live). 
+
+The Live API is running directly in the frontend. That's why you have to generate an ephemeral key first on the server side and pass it to the frontend.
+
+To use the Live API, make sure to pass `apiVersion: "v1alpha"` to the `httpOptions`.
+
+```ts
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
+
+// Server-side: Generate ephemeral key
+const serverLlm = gemini({
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
+  httpOptions: { apiVersion: "v1alpha" },
+});
+const ephemeralKey = await serverLlm.live.getEphemeralKey();
+
+// Client-side: Use ephemeral key for Live API
+const llm = gemini({
+  apiKey: ephemeralKey,
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
+  voiceName: "Zephyr",
+  httpOptions: { apiVersion: "v1alpha" },
+});
+
+const session = await llm.live.connect();
+```
+
 ## Load and index documents

 For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
@@ -90,11 +162,11 @@ const results = await queryEngine.query({
 ## Full Example

 ```ts
-import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
+import { gemini, GEMINI_MODEL } from "@llamaindex/google";
 import { Document, VectorStoreIndex, Settings } from "llamaindex";

-Settings.llm = new Gemini({
-  model: GEMINI_MODEL.GEMINI_PRO,
+Settings.llm = gemini({
+  model: GEMINI_MODEL.GEMINI_2_0_FLASH,
 });

 async function main() {
@@ -104,9 +176,7 @@ async function main() {
  const index = await VectorStoreIndex.fromDocuments([document]);

  // Create a query engine
-  const queryEngine = index.asQueryEngine({
-    retriever,
-  });
+  const queryEngine = index.asQueryEngine();

  const query = "What is the meaning of life?";

@@ -55,7 +55,7 @@ const results = await queryEngine.query({

 ## Full Example

-<include cwd>../../examples/groq.ts</include>
+<include cwd>../../examples/models/groq.ts</include>

 ## API Reference

@@ -378,3 +378,186 @@ async function main() {
 ## API Reference

 - [OpenAI](/docs/api/classes/OpenAI)
+
+
+# OpenAI Live LLM
+
+The OpenAI Live LLM integration in LlamaIndex provides real-time chat capabilities with support for audio streaming and tool calling.
+
+## Basic Usage
+
+```typescript
+import { openai } from "@llamaindex/openai";
+import { tool, ModalityType } from "llamaindex";
+
+// Get the ephimeral key on the server 
+const serverllm = openai({
+  apiKey: "your-api-key", 
+  model: "gpt-4o-realtime-preview-2025-06-03",
+});
+
+// Get an ephemeral key 
+// Usually this code is run on the server and the ephemeral key is passed to the
+// client - the ephemeral key can be securely used on the client side
+const ephemeralKey = await serverllm.live.getEphemeralKey();
+
+// Create a client-side LLM instance with the ephemeral key
+const llm = openai({
+  apiKey: ephemeralKey,
+  model: "gpt-4o-realtime-preview-2025-06-03"
+});
+
+// Create a live sessionimport { tool } from "llamaindex";
+const session = await llm.live.connect({
+  systemInstruction: "You are a helpful assistant.",
+});
+
+// Send a message
+session.sendMessage({
+  content: "Hello!",
+  role: "user",
+});
+```
+
+## Tool Integration
+
+Tools are handled server-side, making it simple to pass them to the live session:
+
+```typescript
+// Define your tools
+const weatherTool = tool({
+  name: "weather",
+  description: "Get the weather for a location",
+  parameters: z.object({
+    location: z.string().describe("The location to get weather for"),
+  }),
+  execute: async ({ location }) => {
+    return `The weather in ${location} is sunny`;
+  },
+});
+
+// Create session with tools
+const session = await llm.live.connect({
+  systemInstruction: "You are a helpful assistant.",
+  tools: [weatherTool],
+});
+```
+
+## Audio Support
+
+For audio capabilities:
+
+```typescript
+// Get microphone access
+const userStream = await navigator.mediaDevices.getUserMedia({
+  audio: true,
+});
+
+// Create session with audio
+const session = await llm.live.connect({
+  audioConfig: {
+    stream: userStream,
+    onTrack: (remoteStream) => {
+      // Handle incoming audio
+      audioElement.srcObject = remoteStream;
+    },
+  },
+});
+```
+
+## Event Handling
+
+Listen to events from the session:
+
+```typescript
+for await (const event of session.streamEvents()) {
+  if (liveEvents.open.include(event)) {
+    // Connection established
+    console.log("Connected!");
+  } else if (liveEvents.text.include(event)) {
+    // Received text response
+    console.log("Assistant:", event.text);
+  }
+}
+```
+
+## Capabilities
+
+The OpenAI Live LLM supports:
+
+- Real-time text chat
+- Audio streaming (if configured)
+- Tool calling (server-side execution)
+- Ephemeral key generation for secure sessions
+
+## API Reference
+
+### LiveLLM Methods
+// Get an ephemeral key 
+// Usually this code is run on the server and the ephemeral key is passed to the
+// client - the ephemeral key can be securely used on the client side
+
+#### `connect(config?: LiveConnectConfig)`
+
+Creates a new live session.
+
+```typescript
+interface LiveConnectConfig {
+  systemInstruction?: string;
+  tools?: BaseTool[];
+  audioConfig?: AudioConfig;
+  responseModality?: ModalityType[];
+}
+```
+
+#### `getEphemeralKey()`
+
+Gets a temporary key for the session.
+
+### LiveLLMSession Methods
+
+#### `sendMessage(message: ChatMessage)`
+
+Sends a message to the assistant.
+
+```typescript
+interface ChatMessage {
+  content: string | MessageContentDetail[];
+  role: "user" | "assistant";
+}
+```
+
+#### `disconnect()`
+
+Closes the session and cleans up resources.
+
+## Error Handling
+
+```typescript
+try {
+  const session = await llm.live.connect();
+} catch (error) {
+  if (error instanceof Error) {
+    console.error("Connection failed:", error.message);
+  }
+}
+```
+
+## Best Practices
+
+1. **Tool Definition**
+
+   - Keep tool implementations server-side
+   - Use clear descriptions for tools
+   - Handle tool errors gracefully
+
+2. **Session Management**
+
+   - Always disconnect sessions when done
+   - Clean up audio resources
+   - Handle reconnection scenarios
+
+3. **Security**
+   - Use ephemeral keys for sessions
+   - Validate tool inputs
+   - Secure API key handling
@@ -11,6 +11,7 @@ A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a
 - [KeywordTableLLMRetriever](/docs/api/classes/KeywordTableLLMRetriever) uses an LLM to extract keywords from the query and retrieve relevant nodes based on keyword matches.
 - [KeywordTableSimpleRetriever](/docs/api/classes/KeywordTableSimpleRetriever) uses a basic frequency-based approach to extract keywords and retrieve nodes.
 - [KeywordTableRAKERetriever](/docs/api/classes/KeywordTableRAKERetriever) uses the RAKE (Rapid Automatic Keyword Extraction) algorithm to extract keywords from the query, focusing on co-occurrence and context for keyword-based retrieval.
+- [Bm25Retriever](/docs/api/classes/Bm25Retriever) uses the BM25 algorithm to extract keywords from the query and retrieve relevant nodes based on keyword matches.

 ```typescript
 const retriever = vectorIndex.asRetriever({
@@ -1,44 +0,0 @@
---
-title: Using API Route
-description: Chat interface for your LlamaIndexTS application using API Route
---
-
-Using [chat-ui](https://github.com/run-llama/chat-ui), it's easy to add a chat interface to your LlamaIndexTS application.
-You just need to create an API route that provides an `api/chat` endpoint and a chat component to consume the API.
-
-## API route
-
-As an example, this is an API route for the Next.js App Router. Copy the following code into your `app/api/chat/route.ts` file to get started:
-
-```json doc-gen:file
-{
-  "file": "./src/app/api/chat/route.ts",
-	"codeblock": true
-}
-```
-
-## Chat UI
-
-This is the simplest way to add a chat interface to your application. Copy the following code into your application to consume the API:
-
-```json doc-gen:file
-{
-  "file": "./src/components/demo/chat/api/demo.tsx",
-	"codeblock": true
-}
-```
-
-## Try it out ⬇️
-
-Combining both, you're getting a fully functional chat interface:
-
-<ChatDemo />
-
-
-## Next Steps
-
-The steps above are the bare minimum to get a chat interface working. From here, you can go two ways:
-
-1. Use [create-llama](https://github.com/run-llama/create-llama) to scaffold a new LlamaIndexTS project including complex API routes and chat interfaces or
-2. Learn more about [chat-ui](https://github.com/run-llama/chat-ui) and [LlamaIndexTS](https://github.com/run-llama/llamaindex-ts) to customize the chat interface and API routes to your needs.
-
@@ -0,0 +1,8 @@
+---
+title: Using @llamaindex/chat-ui
+description: Chat UI components for your LlamaIndexTS application
+---
+
+@llamaindex/chat-ui is a library that provides a set of components for building chat user interfaces. It is built on top of [Shadcn UI](https://ui.shadcn.com).
+
+Check out our [chat-ui](/docs/chat-ui) documentation or try running examples on the [ui.llamaindex.ai](https://ui.llamaindex.ai) website.
@@ -1,22 +0,0 @@
---
-title: Install @llamaindex/chat
-description: Chat interface for your LlamaIndexTS application
---
-
-## Quick Start
-
-You can quickly add a chatbot to your project by using Shadcn CLI command:
-
-```sh
-npx shadcn@latest add https://ui.llamaindex.ai/r/chat.json
-```
-
-## Manual Installation
-
-To install the package, run the following command in your project directory:
-
-```sh
-npm i @llamaindex/chat-ui
-```
-
-For more information, check out the [github.comrun-llama/chat-ui](https://github.com/run-llama/chat-ui)
@@ -9,145 +9,11 @@ LlamaIndexServer is a Next.js-based application that allows you to quickly launc

 ## Features

- Serving a workflow as a chatbot
+- Add a sophisticated chatbot UI to your LlamaIndex workflow
+- Edit code and document artifacts in an OpenAI Canvas-style UI
+- Extendable UI components for events and headers
 - Built on Next.js for high performance and easy API development
- Optional built-in chat UI with extendable UI components
- Prebuilt development code
-
-## Installation
-
-```package-install
-npm i @llamaindex/server
-```

 ## Quick Start

-Create index.ts file and add the following code:
-
-```ts
-import { LlamaIndexServer } from "@llamaindex/server";
-import { wiki } from "@llamaindex/tools"; // or any other tool
-
-const createWorkflow = () => agent({ tools: [wiki()] })
-
-new LlamaIndexServer({
-  workflow: createWorkflow,
-  uiConfig: {
-    appTitle: "LlamaIndex App",
-    starterQuestions: ["Who is the first president of the United States?"],
-  },
-}).start();
-```
-
-## Running the Server
-
-In the same directory as `index.ts`, run the following command to start the server:
-
-  ```bash
-  tsx index.ts
-  ```
-The server will start at `http://localhost:3000`
-
-You can also make a request to the server:
-
-  ```bash
-  curl -X POST "http://localhost:3000/api/chat" -H "Content-Type: application/json" -d '{"message": "Who is the first president of the United States?"}'
-  ```
-
-## Configuration Options
-
-The LlamaIndexServer accepts the following configuration
-
- `workflow`: A callable function that creates a workflow instance for each request
- `uiConfig`: An object to configure the chat UI containing the following properties:
-  - `appTitle`: The title of the application (default: `"LlamaIndex App"`)
-  - `starterQuestions`: List of starter questions for the chat UI (default: `[]`)
-  - `componentsDir`: The directory for custom UI components rendering events emitted by the workflow. The default is undefined, which does not render custom UI components.
-  - `llamaCloudIndexSelector`: Whether to show the LlamaCloud index selector in the chat UI (requires `LLAMA_CLOUD_API_KEY` to be set in the environment variables) (default: `false`)
-
-LlamaIndexServer accepts all the configuration options from Nextjs Custom Server such as `port`, `hostname`, `dev`, etc.
-See all Nextjs Custom Server options [here](https://nextjs.org/docs/app/building-your-application/configuring/custom-server).
-
-## Default Endpoints and Features
-
-### Chat Endpoint
-
-The server includes a default chat endpoint at `/api/chat` for handling chat interactions.
-
-### Chat UI
-
-The server always provides a chat interface at the root path (`/`) with:
-
- Configurable starter questions
- Real-time chat interface
- API endpoint integration
-
-### Static File Serving
-
- The server automatically mounts the `data` and `output` folders at `{server_url}{api_prefix}/files/data` (default: `/api/files/data`) and `{server_url}{api_prefix}/files/output` (default: `/api/files/output`) respectively.
- Your workflows can use both folders to store and access files. As a convention, the `data` folder is used for documents that are ingested and the `output` folder is used for documents that are generated by the workflow.
-
-
-## Custom UI Components
-
-The LlamaIndex server provides support for rendering workflow events using custom UI components, allowing you to extend and customize the chat interface.
-
-### Overview
-
-Custom UI components are a powerful feature that enables you to:
-
- Add custom interface elements to the chat UI using React JSX or TSX files
- Extend the default chat interface functionality
- Create specialized visualizations or interactions
-
-### Configuration
-
-Your workflow must emit events that fit this structure, allowing the LlamaIndex server to display the right UI components based on the event type.
-
-```json
-{
-    "type": "<event_name>",
-    "data": <data model>
-}
-```
-
-### Server Setup
-
-1. Initialize the LlamaIndex server with a component directory:
-
-```ts
-new LlamaIndexServer({
-  workflow: createWorkflow,
-  uiConfig: {
-    appTitle: "LlamaIndex App",
-    componentsDir: "components",
-  },
-}).start();
-```
-
-2. Add the custom component code to the directory following the naming pattern:
-
-   - File Extension: `.jsx` and `.tsx` for React components
-   - File Name: Should match the event type from your workflow (e.g., `deep_research_event.jsx` for handling `deep_research_event` type that you defined in your workflow). If there are TSX and JSX files with the same name, the TSX file will be used.
-   - Component Name: Export a default React component named `Component` that receives props from the event data
-
-   Example component structure:
-
-   ```jsx
-   function Component({ events }) {
-       // Your component logic here
-       return (
-           // Your UI code here
-       );
-   }
-   ```
-
-## Best Practices
-
-1. Always provide a workflow factory that creates fresh workflow instances
-2. Use environment variables for sensitive configuration
-3. Use starter questions to guide users in the chat UI
-
-## Getting Started with a New Project
-
-Want to start a new project with LlamaIndexServer? Check out our [create-llama](https://github.com/run-llama/create-llama) tool to quickly generate a new project with LlamaIndexServer.
+Check the latest information on the NPM package page: https://www.npmjs.com/package/@llamaindex/server
@@ -2,5 +2,5 @@
  "title": "Chat UI",
  "description": "Use chat-ui to add a chat interface to your LlamaIndexTS application.",
  "defaultOpen": false,
-  "pages": ["install", "chat", "rsc", "llamaindex-server"]
+  "pages": ["index", "llamaindex-server"]
 }
@@ -1,65 +0,0 @@
---
-title: Using Next.js RSC
-description: Chat interface for your LlamaIndexTS application using Next.js RSC
---
-
-Using [chat-ui](https://github.com/run-llama/chat-ui), it's easy to add a chat interface to your LlamaIndexTS application using [Next.js RSC](https://nextjs.org/docs/app/building-your-application/rendering/server-components) and [Vercel AI RSC](https://sdk.vercel.ai/docs/ai-sdk-rsc/overview).
-
-With RSC, the chat messages are not returned as JSON from the server (like when using an [API route](/docs/llamaindex/modules/ui/chat)), instead the chat message components are rendered on the server side.
-This is for example useful for rendering a whole chat history on the server before sending it to the client. [Check here](https://sdk.vercel.ai/docs/getting-started/navigating-the-library#when-to-use-ai-sdk-rsc), for a discussion of when to use use RSC.
-
-For implementing a chat interface with RSC, you need to create an AI action and then connect the chat interface to use it.
-
-## Create an AI action
-
-First, define an [AI context provider](https://sdk.vercel.ai/examples/rsc/state-management/ai-ui-states) with a chat server action:
-
-```json doc-gen:file
-{
-  "file": "./src/components/demo/chat/rsc/ai-action.tsx",
-	"codeblock": true
-}
-```
-
-The chat server action is using LlamaIndexTS to generate a response based on the chat history and the user input.
-
-## Create the chat UI
-
-The entrypoint of our application initializes the AI provider for the application and adds a `ChatSection` component:
-
-```json doc-gen:file
-{
-  "file": "./src/components/demo/chat/rsc/demo.tsx",
-	"codeblock": true
-}
-```
-
-The `ChatSection` component is created by using chat components from @llamaindex/chat-ui:
-
-```json doc-gen:file
-{
-  "file": "./src/components/demo/chat/rsc/chat-section.tsx",
-	"codeblock": true
-}
-```
-
-It is using a `useChatRSC` hook to conntect the chat interface to the `chat` AI action that we defined earlier:
-
-```json doc-gen:file
-{
-  "file": "./src/components/demo/chat/rsc/use-chat-rsc.tsx",
-	"codeblock": true
-}
-```
-
-## Try RSC Chat ⬇️
-
-<ChatDemoRSC />
-
-## Next Steps
-
-The steps above are the bare minimum to get a chat interface working with RSC. From here, you can go two ways:
-
-1. Use our [full-stack RSC example](https://github.com/run-llama/nextjs-rsc) based on [create-llama](https://github.com/run-llama/create-llama) to get started quickly with a fully working chat interface or
-2. Learn more about [AI RSC](https://sdk.vercel.ai/examples/rsc), [chat-ui](https://github.com/run-llama/chat-ui) and [LlamaIndexTS](https://github.com/run-llama/llamaindex-ts) to customize the chat interface and AI actions to your needs.
-
@@ -15,7 +15,7 @@ In LlamaIndex, an agent is a semi-autonomous piece of software powered by an LLM
 You'll need to have a recent version of [Node.js](https://nodejs.org/en) installed. Then you can install LlamaIndex.TS by running

 ```package-install
-npm i llamaindex @llamaindex/openai @llamaindex/readers @llamaindex/huggingface
+npm i llamaindex @llamaindex/openai @llamaindex/readers @llamaindex/huggingface @llamaindex/workflow
 ```

 ## Choose your model
@@ -35,11 +35,16 @@ First we'll need to pull in our dependencies. These are:
 import "dotenv/config";
 import {
  agent,
-  AgentStream,
-  tool,
+  agentStreamEvent,
  openai,
+} from "@llamaindex/workflow";
+import {
+  tool,
  Settings,
 } from "llamaindex";
+import {
+  openai,
+} from "@llamaindex/openai";
 import { z } from "zod";
 ```

@@ -108,11 +113,10 @@ const myAgent = agent({ tools });

 ### Ask the agent a question

-We can use the `chat` interface to ask our agent a question, and it will use the tools we've defined to find an answer.
+We can use the `run` method to ask our agent a question, and it will use the tools we've defined to find an answer.

 ```javascript
-const context = myAgent.run("Sum 101 and 303");
-const result = await context;
+const result = await myAgent.run("Sum 101 and 303");
 console.log(result.data);
 ```
 You will see the following output:
@@ -123,12 +127,13 @@ You will see the following output:
 { result: 'The sum of 101 and 303 is 404.' }
 ```

-To stream the response, you can use the `AgentStream` event which provides chunks of the response as they become available. This allows you to display the response incrementally rather than waiting for the full response:
+To stream the response, you need to call `runStream`, which returns a stream of events. 
+The `agentStreamEvent` provides chunks of the response as they become available. This allows you to display the response incrementally rather than waiting for the full response:

 ```javascript
-const context = myAgent.run("Add 101 and 303");
-for await (const event of context) {
-  if (event instanceof AgentStream) {
+const events = myAgent.runStream("Add 101 and 303");
+for await (const event of events) {
+  if (agentStreamEvent.include(event)) {
    process.stdout.write(event.data.delta);
  }
 }
@@ -140,18 +145,18 @@ for await (const event of context) {
 The sum of 101 and 303 is 404.
 ```

+Note that we're filtering for `agentStreamEvent` as an agent might return other events - more about that in the following section.
+
 ### Logging workflow events

 To log the workflow events, you can check the event type and log the event data.

 ```javascript
-const context = myAgent.run("Sum 202 and 404");
-for await (const event of context) {
-  if (event instanceof AgentStream) {
+const events = myAgent.runStream("Sum 202 and 404");
+for await (const event of events) {
+  if (agentStreamEvent.include(event)) {
    // Stream the response
-    for (const chunk of event.data.delta) {
-      process.stdout.write(chunk);
-    }
+    process.stdout.write(event.data.delta);
  } else {
    // Log other events
    console.log("\nWorkflow event:", JSON.stringify(event, null, 2));
@@ -30,16 +30,16 @@ Settings.llm = ollama({

 ### Run local agent

-You can also create local agent by importing `agent` from `llamaindex`.
+You can also create local agent by importing `agent` from `@llamaindex/workflow`.

 ```javascript
-import { agent } from "llamaindex";
+import { agent } from "@llamaindex/workflow";

 const workflow = agent({
  tools: [getWeatherTool],
 });

-const workflowContext = workflow.run(
+const resutl = workflow.run(
  "What's the weather like in San Francisco?",
 );
 ```
@@ -25,7 +25,8 @@ We'll be bringing in `SimpleDirectoryReader`, `HuggingFaceEmbedding`, `VectorSto

 ```javascript
 import { QueryEngineTool, Settings, VectorStoreIndex } from "llamaindex";
-import { OpenAI, OpenAIAgent } from "@llamaindex/openai";
+import { agent } from "@llamaindex/workflow";
+import { openai } from "@llamaindex/openai";
 import { HuggingFaceEmbedding } from "@llamaindex/huggingface";
 import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
 ```
@@ -58,25 +59,9 @@ We will convert our text into embeddings using the `VectorStoreIndex` class thro
 const index = await VectorStoreIndex.fromDocuments(documents);
 ```

-### Configure a retriever
-
-Before LlamaIndex can send a query to the LLM, it needs to find the most relevant chunks to send. That's the purpose of a `Retriever`. We're going to get `VectorStoreIndex` to act as a retriever for us
-
-```javascript
-const retriever = await index.asRetriever();
-```
-
-### Configure how many documents to retrieve
-
-By default LlamaIndex will retrieve just the 2 most relevant chunks of text. This document is complex though, so we'll ask for more context.
-
-```javascript
-retriever.similarityTopK = 10;
-```
-
 ### Use index.queryTool

-`index.queryTool` creates a `QueryEngineTool` that can be used be an agent to query data from the index. 
+`index.queryTool` creates a `QueryEngineTool` that can be used be an agent to query data from the index:

 ```javascript
 const tools = [
@@ -85,9 +70,17 @@ const tools = [
      name: "san_francisco_budget_tool",
      description: `This tool can answer detailed questions about the individual components of the budget of San Francisco in 2023-2024.`,
    },
+    options: { similarityTopK: 10 },
  }),
 ];
+```

+The `metadata` that we're setting helps the agent to decide when to use the tool.
+Note that by default LlamaIndex will retrieve just the 2 most relevant chunks of text. This document is complex though, so we'll ask for more context by setting `similarityTopK` to 10.
+
+Now, we can create an agent using the `QueryEngineTool`:
+
+```javascript
 // Create an agent using the tools array
 const ragAgent = agent({ tools });

@@ -12,6 +12,7 @@ const tools = [
      name: "san_francisco_budget_tool",
      description: `This tool can answer detailed questions about the individual components of the budget of San Francisco in 2023-2024.`,
    },
+    options: { similarityTopK: 10 },
  }),
  tool({
    name: "sumNumbers",
@@ -8,9 +8,10 @@ We have a comprehensive, step-by-step [guide to building agents in LlamaIndex.TS

 In a new folder:

-```bash npm2yarn
+```package-install
 npm init
 npm i -D typescript @types/node
+npm i @llamaindex/openai @llamaindex/workflow llamaindex zod
 ```

 ## Run agent
@@ -20,15 +21,14 @@ Create the file `example.ts`. This code will:
 - Create two tools for use by the agent:
  - A `sumNumbers` tool that adds two numbers
  - A `divideNumbers` tool that divides numbers
-
 - Give an example of the data structure we wish to generate
 - Prompt the LLM with instructions and the example, plus a sample transcript

-<include cwd>../../examples/agent/openai.ts</include>
+<include cwd>../../examples/agents/agent/openai.ts</include>

 To run the code:

-```bash
+```package-install
 npx tsx example.ts
 ```

@@ -36,9 +36,18 @@ You should expect output something like:

 ```
 {
-  content: 'The sum of 5 + 5 is 10. When you divide 10 by 2, you get 5.',
-  role: 'assistant',
-  options: {}
+  result: '5 + 5 is 10. Then, 10 divided by 2 is 5.',
+  state: {
+    memory: ChatMemoryBuffer {
+      chatStore: SimpleChatStore {},
+      chatStoreKey: 'chat_history',
+      tokenLimit: 750000
+    },
+    scratchpad: [],
+    currentAgentName: 'Agent',
+    agents: [ 'Agent' ],
+    nextAgentName: null
+  }
 }
 Done
 ```
@@ -4,7 +4,7 @@
    "basic_agent",
    "rag",
    "agents",
-    "workflow",
+    "workflows",
    "local_llm",
    "chatbot",
    "structured_data_extraction"
@@ -16,7 +16,7 @@ LlamaIndex uses a two stage method when using an LLM with your data:
 1. **indexing stage**: preparing a knowledge base, and
 2. **querying stage**: retrieving relevant context from the knowledge to assist the LLM in responding to a question

-![](./_static/concepts/rag.jpg)
+![](/_static/concepts/rag.jpg)

 This process is also known as Retrieval Augmented Generation (RAG).

@@ -28,7 +28,7 @@ Let's explore each stage in detail.

 LlamaIndex.TS help you prepare the knowledge base with a suite of data connectors and indexes.

-![](./_static/concepts/indexing.jpg)
+![](/_static/concepts/indexing.jpg)

 [**Data Loaders**](/docs/llamaindex/modules/data/readers):
 A data connector (i.e. `Reader`) ingest data from different data sources and data formats into a simple `Document` representation (text and simple metadata).
@@ -54,7 +54,7 @@ LlamaIndex provides composable modules that help you build and integrate RAG pip

 These building blocks can be customized to reflect ranking preferences, as well as composed to reason over multiple knowledge bases in a structured way.

-![](./_static/concepts/querying.jpg)
+![](/_static/concepts/querying.jpg)

 #### Building Blocks

@@ -8,9 +8,10 @@ One of the most common use-cases for LlamaIndex is Retrieval-Augmented Generatio

 In a new folder, run:

-```bash npm2yarn
+```package-install
 npm init
 npm i -D typescript @types/node
+npm i llamaindex
 ```

 Then, check out the [installation](/docs/llamaindex/getting_started/installation) steps to install LlamaIndex.TS and prepare an OpenAI key.
@@ -26,7 +27,7 @@ Create the file `example.ts`. This code will
 - index it (which creates embeddings using OpenAI)
 - create a query engine to answer questions about the data

-<include cwd>../../examples/vectorIndex.ts</include>
+<include cwd>../../examples/index/vectorIndex.ts</include>

 Create a `tsconfig.json` file in the same folder:

@@ -34,7 +35,7 @@ Create a `tsconfig.json` file in the same folder:

 Now you can run the code with

-```bash
+```package-install
 npx tsx example.ts
 ```

@@ -10,9 +10,10 @@ You can use [other LLMs](/docs/llamaindex/modules/models/llms) via their APIs; i

 In a new folder:

-```bash npm2yarn
+```package-install
 npm init
 npm i -D typescript @types/node
+npm i @llamaindex/openai zod
 ```

 ## Extract data
@@ -23,11 +24,11 @@ Create the file `example.ts`. This code will:
 - Give an example of the data structure we wish to generate
 - Prompt the LLM with instructions and the example, plus a sample transcript

-<include cwd>../../examples/jsonExtract.ts</include>
+<include cwd>../../examples/misc/jsonExtract.ts</include>

 To run the code:

-```bash
+```package-install
 npx tsx example.ts
 ```

@@ -1,224 +0,0 @@
---
-title: Inputs / Outputs
-description: Learn how to use different inputs and outputs in your workflows.
---
-
-Inputs and outputs are the way to communicate between steps in a workflow. In the previous example,
-we used `StartEvent` and `StopEvent` to communicate between steps. However, you can use any type of event to communicate between steps.
-
-## Multiple inputs
-
-You can define multiple inputs for a step.
-
-In the following example, we define a complex workflow with multiple inputs and outputs.
-
-```ts twoslash
-import { Workflow, StartEvent, StopEvent, WorkflowEvent } from '@llamaindex/workflow';
-
-class AEvent extends WorkflowEvent<string> {
-	constructor(data: string) {
-		super(data);
-	}
-}
-
-class BEvent extends WorkflowEvent<number> {
-	constructor(data: number) {
-		super(data);
-	}
-}
-
-class ResultEvent extends WorkflowEvent<string> {
-	constructor(data: string) {
-		super(data);
-	}
-}
-```
-
-First, let's define the events that we will use in the workflow.
-
-```ts twoslash
-import { Workflow, StartEvent, StopEvent, WorkflowEvent } from '@llamaindex/workflow';
-
-class AEvent extends WorkflowEvent<string> {
-	constructor(data: string) {
-		super(data);
-	}
-}
-
-class BEvent extends WorkflowEvent<number> {
-	constructor(data: number) {
-		super(data);
-	}
-}
-
-class ResultEvent extends WorkflowEvent<string> {
-	constructor(data: string) {
-		super(data);
-	}
-}
-
-const workflow = new Workflow<never, string, string>();
-
-workflow.addStep({
-	inputs: [StartEvent<string>],
-	outputs: [StopEvent<string>]
-}, async (
-	context,
-	startEvent
-) => {
-	const input = startEvent.data;
-	const aEvent = await context.requireEvent(AEvent);
-	const bEvent = await context.requireEvent(BEvent);
-	const a = aEvent.data;
-	const b = bEvent.data;
-	return new StopEvent(`Hello, ${input}! A: ${a}, B: ${b}`);
-});
-
-// ---cut---
-workflow.addStep({
-	inputs: [AEvent, BEvent],
-	outputs: [ResultEvent]
-}, async (
-	context,
-	aEvent,
-	bEvent
-) => {
-	const a = aEvent.data;
-	const b = bEvent.data;
-	return new ResultEvent(`A: ${a}, B: ${b}`);
-});
-```
-
-This step means that it requires two events: `AEvent` and `BEvent`. It will return a `ResultEvent` with the data `A: ${a}, B: ${b}`.
-
-## A or B input
-
-If we want to have a step that can accept either `AEvent` or `BEvent`, we can define the step like this:
-
-```ts twoslash
-import { Workflow, StartEvent, StopEvent, WorkflowEvent } from '@llamaindex/workflow';
-
-class AEvent extends WorkflowEvent<string> {
-	constructor(data: string) {
-		super(data);
-	}
-}
-
-class BEvent extends WorkflowEvent<number> {
-	constructor(data: number) {
-		super(data);
-	}
-}
-
-class ResultEvent extends WorkflowEvent<string> {
-	constructor(data: string) {
-		super(data);
-	}
-}
-
-const workflow = new Workflow<never, string, string>();
-
-workflow.addStep({
-	inputs: [StartEvent<string>],
-	outputs: [StopEvent<string>]
-}, async (
-	context,
-	startEvent
-) => {
-	const input = startEvent.data;
-	const aEvent = await context.requireEvent(AEvent);
-	const bEvent = await context.requireEvent(BEvent);
-	const a = aEvent.data;
-	const b = bEvent.data;
-	return new StopEvent(`Hello, ${input}! A: ${a}, B: ${b}`);
-});
-
-// ---cut---
-workflow.addStep({
-	inputs: [WorkflowEvent.or(AEvent, BEvent)],
-	outputs: [ResultEvent]
-}, async (
-	context,
-	aOrBEvent
-) => {
-	if (aOrBEvent instanceof AEvent) {
-		// ^?
-
-
-		const a = aOrBEvent.data;
-		//        ^?
-
-
-		return new ResultEvent(`A: ${a}`);
-	} else {
-		const b = aOrBEvent.data;
-		//        ^?
-
-
-		return new ResultEvent(`B: ${b}`);
-	}
-});
-```
-
-This step means that it requires either `AEvent` or `BEvent`. It will return a `ResultEvent` with the data `A: ${a}` or `B: ${b}`.
-
-You can still combine the logic with `context.requireEvent` to get the data from the event.
-
-<Accordions>
-	<Accordion title="Under the hood">
-		We use JavaScript Inheritance and the prototype chain to implement the `or` logic.
-		The `or` method creates a new class that extends the two classes that you pass to it.
-
-		<a
-			target="_blank"
-			href="https://developer.mozilla.org/en-US/docs/Web/JavaScript/Inheritance_and_the_prototype_chain"
-		>
-			MDN - Inheritance and the prototype chain
-		</a>
-	</Accordion>
-</Accordions>
-
-## Multiple outputs
-
-You can define multiple outputs for a step.
-
-```ts twoslash
-import { Workflow, StartEvent, StopEvent, WorkflowEvent } from '@llamaindex/workflow';
-
-class AEvent extends WorkflowEvent<string> {
-	constructor(data: string) {
-		super(data);
-	}
-}
-
-class BEvent extends WorkflowEvent<number> {
-	constructor(data: number) {
-		super(data);
-	}
-}
-
-class ResultEvent extends WorkflowEvent<string> {
-	constructor(data: string) {
-		super(data);
-	}
-}
-
-const workflow = new Workflow<never, string, string>();
-// ---cut---
-workflow.addStep({
-	inputs: [StartEvent<string>],
-	outputs: [AEvent, BEvent]
-}, async (
-	context,
-	startEvent
-) => {
-	const input = startEvent.data;
-	if (Math.random() > 0.5) {
-		return new AEvent(`Hello, ${input}!`);
-	} else {
-		return new BEvent(42);
-	}
-});
-```
-
-This step will return either an `AEvent` or a `BEvent` based on a random number.
@@ -1,196 +0,0 @@
---
-title: Basic Usage
-description: Learn how to use the LlamaIndex workflow.
---
-
-A `Workflow` in LlamaIndex.TS is an event-driven abstraction used to chain together several events.
-Workflows are made up of steps, with each step responsible for handling certain event types and emitting new events.
-
-Workflows are designed for any cases that benefit from event-driven programming, not only for LLM and AI tasks.
-
-```package-install
-npm i @llamaindex/workflow
-```
-
-## Start from scratch
-
-Let's start from a Hello World workflow.
-
-```ts twoslash
-import { Workflow } from '@llamaindex/workflow';
-
-type ContextData = {
-	counter: number;
-}
-// ---cut---
-const contextData: ContextData = { counter: 0 };
-
-const workflow = new Workflow<ContextData, string, string>();
-//     ^?
-
-
-
-```
-
-First, we define a workflow with 3 generic types: `ContextData`, `Input`, and `Output`.
-
-In general, `ContextData` is used to store the shared data between steps, `Input` is the type of the input event, and `Output` is the type of the output event.
-
-In you code logic, you should **share state between steps via `ContextData`**.
-
-```ts twoslash
-import { Workflow, StartEvent, StopEvent } from '@llamaindex/workflow';
-
-type ContextData = {
-	counter: number;
-}
-
-const contextData: ContextData = { counter: 0 };
-
-const workflow = new Workflow<ContextData, string, string>();
-// ---cut---
-workflow.addStep({
-	inputs: [StartEvent<string>],
-	outputs: [StopEvent<string>]
-}, async (context, startEvent) => {
-	const input = startEvent.data;
-	context.data.counter++;
-	return new StopEvent(`Hello, ${input}!`);
-});
-```
-
-In the workflow, we add a step that listens to `StartEvent<string>` and emits `StopEvent<string>`.
-
-The step is an async function that takes two arguments: `context` and `event`.
-
-### `context` type
-
-<AutoTypeTable path="./src/deps/type.ts" name="HandlerContext" />
-
-There are two more properties in `HandlerContext`:
-
- `sendEvent`: invoke another event in the workflow, other than `StartEvent`, `StopEvent`, or the current event. (Or there will have circular reference)
- `requireEvent`: wait for a specific event to be emitted.
-
-You can use `sendEvent` and `requireEvent` to build complex workflows.
-
-```ts twoslash
-import { Workflow, StartEvent, StopEvent, WorkflowEvent } from '@llamaindex/workflow';
-
-type ContextData = {
-	counter: number;
-}
-
-const contextData: ContextData = { counter: 0 };
-
-const workflow = new Workflow<ContextData, string, string>();
-
-// ---cut---
-class AnalysisStartEvent extends WorkflowEvent<string> {}
-class AnalysisStopEvent extends WorkflowEvent<boolean> {}
-workflow.addStep({
-	inputs: [AnalysisStartEvent],
-	outputs: [AnalysisStopEvent]
-}, async (...args) => {
-	// do some analysis
-	return new AnalysisStopEvent(true);
-})
-workflow.addStep({
-	inputs: [StartEvent<string>],
-	outputs: [StopEvent<string>]
-}, async (context, startEvent) => {
-	const input = startEvent.data;
-	context.sendEvent(new AnalysisStartEvent('start'));
-	context.data.counter++;
-	const { data } = await context.requireEvent(AnalysisStopEvent);
-	return new StopEvent(`Hello, ${input}! Analysis result: ${data ? 'success' : 'fail'}`);
-});
-```
-
-For example, you can compile `requireEvent` with `waitUntil` in [Vercel Functions](https://vercel.com/docs/functions/functions-api-reference#waituntil) or [Cloudflare Worker](https://developers.cloudflare.com/workers/runtime-apis/context/#waituntil)
-
-```ts twoslash
-import { waitUntil } from '@vercel/functions';
-import { Workflow, StartEvent, StopEvent, WorkflowEvent } from '@llamaindex/workflow';
-
-type ContextData = {
-	counter: number;
-}
-
-const contextData: ContextData = { counter: 0 };
-
-const workflow = new Workflow<ContextData, string, string>();
-
-class AnalysisStartEvent extends WorkflowEvent<string> {}
-class AnalysisStopEvent extends WorkflowEvent<boolean> {}
-
-// ---cut---
-workflow.addStep({
-	inputs: [StartEvent<string>],
-	outputs: [StopEvent<string>]
-}, async (context, startEvent) => {
-	const input = startEvent.data;
-	context.sendEvent(new AnalysisStartEvent('start'));
-	context.data.counter++;
-	waitUntil(context.requireEvent(AnalysisStopEvent));
-	// note that `waitUntil` is not a promise, it will extend the lifetime of the workflow
-	// you can wait for some background tasks to finish
-	return new StopEvent(`Hello, ${input}!`);
-});
-```
-
-## Multiple runs
-
-You can run the same workflow multiple times with different inputs.
-
-```ts twoslash
-import { Workflow, StartEvent, StopEvent } from '@llamaindex/workflow';
-
-type ContextData = {
-	counter: number;
-}
-
-const contextData: ContextData = { counter: 0 };
-
-const workflow = new Workflow<ContextData, string, string>();
-
-workflow.addStep({
-	inputs: [StartEvent<string>],
-	outputs: [StopEvent<string>]
-}, async (context, startEvent) => {
-	const input = startEvent.data;
-	context.data.counter++;
-	return new StopEvent(`Hello, ${input}!`);
-});
-
-// ---cut---
-{
-	const ret = await workflow.run('Alex', contextData);
-	console.log(ret.data); // Hello, Alex!
-}
-
-{
-	const ret = await workflow.run('World', contextData);
-	console.log(ret.data); // Hello, World!
-}
-```
-
-Context is shared between runs, so the counter will be increased.
-
-Ideally, it should be serializable to make sure it can be recovered from HTTP requests or other storage.
-
-### Full example
-
-<iframe
-	className="w-full h-[440px]"
-	aria-label="Workflow example"
-	src="https://stackblitz.com/github/run-llama/LlamaIndexTS/tree/main/examples?file=node/workflow/basic.ts"
-/>
-
-## `Workflow` type
-
-<AutoTypeTable path="./src/deps/type.ts" name="Workflow" />
-
-## `WorkflowContext` type
-
-<AutoTypeTable path="./src/deps/type.ts" name="WorkflowContext" />
@@ -1,6 +0,0 @@
-{
-  "title": "Workflow",
-  "description": "See how to use @llamaindex/workflow",
-  "defaultOpen": false,
-  "pages": ["index", "different-inputs-outputs", "streaming"]
-}
@@ -1,198 +0,0 @@
---
-title: Streaming
-description: Learn how to use the LlamaIndex workflow with streaming.
---
-
-`Workflow` API by default is designed for streaming data. In this guide, we will show you how to use the `Workflow` API with streaming data.
-
-Each `workflow.run` call returns `WorkflowContext`, which implements `AsyncIterable` interface. You can use it to stream data.
-
-```ts twoslash
-import { Workflow, WorkflowEvent, StartEvent, StopEvent } from '@llamaindex/workflow';
-class ComputeEvent extends WorkflowEvent<number> {
-	constructor(data: number) {
-		super(data);
-	}
-}
-class ComputeResultEvent extends WorkflowEvent<number> {
-	constructor(data: number) {
-		super(data);
-	}
-}
-
-type ContextData = {
-	sum: number;
-}
-
-const workflow = new Workflow<ContextData, number, number>();
-workflow.addStep({
-	inputs: [StartEvent<number>],
-	outputs: [StopEvent<number>]
-}, async (context, startEvent) => {
-	const total = startEvent.data;
-	for (let i = 0; i < total; i++) {
-		context.sendEvent(new ComputeEvent(i));
-	}
-	const computeResults = await Promise.all(Array.from({ length: total }).map(() => context.requireEvent(ComputeResultEvent)));
-	// Workflow API allows you to start events in parallel and wait for all of them to finish
-	context.data.sum = computeResults.reduce((acc, curr) => acc + curr.data, 0);
-	return new StopEvent(context.data.sum);
-});
-```
-
-We define a parallel computation workflow that computes the sum of numbers from 0 to `total`.
-
-The workflow sends `ComputeEvent` events for each number and waits for `ComputeResultEvent` events. After receiving all `ComputeResultEvent` events, the workflow returns the sum as a `StopEvent`.
-
-What if we want cutoff if the sum exceeds a certain value?
-
-## Streaming
-
-```ts twoslash
-import { Workflow, WorkflowEvent, StartEvent, StopEvent } from '@llamaindex/workflow';
-import { StopCircle } from 'lucide-react';
-class ComputeEvent extends WorkflowEvent<number> {
-	constructor(data: number) {
-		super(data);
-	}
-}
-class ComputeResultEvent extends WorkflowEvent<number> {
-	constructor(data: number) {
-		super(data);
-	}
-}
-
-
-type ContextData = {
-	sum: number;
-}
-
-const workflow = new Workflow<ContextData, number, number>();
-// ---cut---
-const context = workflow.run(1000, {
-	sum: 0
-});
-
-for await (const event of context) {
-	if (event instanceof ComputeEvent) {
-		if (context.data.sum > 100) {
-			throw new Error('Sum exceeds 100');
-		}
-	}
-	if (event instanceof StopEvent) {
-		console.log('result', event.data);
-	}
-}
-```
-
-You can define more custom logic using `AsyncIterable` interface.
-
-For example. I just want to stop the workflow if I get a `ComputeResultEvent`
-
-
-```ts twoslash
-import { Workflow, WorkflowEvent, StartEvent, StopEvent } from '@llamaindex/workflow';
-import { StopCircle } from 'lucide-react';
-class ComputeEvent extends WorkflowEvent<number> {
-	constructor(data: number) {
-		super(data);
-	}
-}
-class ComputeResultEvent extends WorkflowEvent<number> {
-	constructor(data: number) {
-		super(data);
-	}
-}
-
-
-type ContextData = {
-	sum: number;
-}
-
-const workflow = new Workflow<ContextData, number, number>();
-// ---cut---
-async function compute() {
-	const context = workflow.run(1000, {
-		sum: 0
-	});
-	for await (const event of context) {
-		if (event instanceof ComputeResultEvent) {
-			return event.data;
-		}
-	}
-	throw new Error('UNREACHABLE');
-}
-
-const result = await compute();
-```
-
-### Streaming with UI
-
-You can use the `Workflow` API with UI libraries like React.
-
-```tsx twoslash
-// @filename: utils.ts
-export async function runWithoutBlocking(fn: () => Promise<void>) {
-	fn();
-}
-// @filename: action.ts
-// ---cut---
-'use server';
-// "use server" is required to enable server side feature in React
-import { createStreamableUI } from 'ai/rsc';
-import { runWithoutBlocking } from './utils';
-// ---cut-start---
-import { Workflow, WorkflowEvent, StartEvent, StopEvent } from '@llamaindex/workflow';
-class ComputeEvent extends WorkflowEvent<number> {
-	constructor(data: number) {
-		super(data);
-	}
-}
-class ComputeResultEvent extends WorkflowEvent<number> {
-	constructor(data: number) {
-		super(data);
-	}
-}
-
-
-type ContextData = {
-	sum: number;
-}
-
-const workflow = new Workflow<ContextData, number, number>();
-const min = 100;
-const max = 1000;
-workflow.addStep(
-	{
-		inputs: [ComputeEvent],
-		outputs: [ComputeResultEvent]
-	},
-	async (context, event) => {
-		await new Promise((resolve) =>
-			setTimeout(resolve, Math.floor(Math.random() * (max - min + 1) + min))
-		);
-		return new ComputeResultEvent(event.data);
-	}
-);
-// ---cut-end---
-export async function compute() {
-	'use server';
-	const ui = createStreamableUI();
-	const context = workflow.run(100, {
-		sum: 0
-	});
-	runWithoutBlocking(async () => {
-		for await (const event of context) {
-			if (event instanceof ComputeResultEvent) {
-				// Update UI
-			} else if (event instanceof StopEvent) {
-				// Update UI
-			}
-			// ...
-		}
-	});
-	return ui.value;
-}
-```
-
-<WorkflowStreamingDemo />
@@ -0,0 +1,176 @@
+---
+title: Workflows
+---
+
+A `Workflow` in LlamaIndex is a lightweight, event-driven abstraction used to chain together several events. Workflows are made up of `handlers`, with each one responsible for processing specific event types and emitting new events.
+
+Workflows are designed to be flexible and can be used to build agents, RAG flows, extraction flows, or anything else you want to implement.
+
+```package-install
+npm i @llamaindex/workflow @llamaindex/openai
+```
+
+## Getting Started
+
+Let's explore a simple workflow example where a joke is generated and then critiqued and iterated on:
+
+<include cwd>../../examples/agents/workflow/joke.ts</include>
+
+There are a few moving pieces here, so let's go through this step by step.
+
+### Defining Workflow Events
+
+```typescript
+const startEvent = workflowEvent<string>(); // Input topic for joke
+const jokeEvent = workflowEvent<{ joke: string }>(); // Intermediate joke
+const critiqueEvent = workflowEvent<{ joke: string; critique: string }>(); // Intermediate critique
+const resultEvent = workflowEvent<{ joke: string; critique: string }>(); // Final joke + critique
+```
+
+Events are defined using the `workflowEvent` function and contain arbitrary data provided as a generic type. In this example, we have four events:
+- `startEvent`: Takes a string input (the joke topic)
+- `jokeEvent`: Contains an object with a joke property
+- `critiqueEvent`: Contains both the joke and its critique, used for the feedback loop
+- `resultEvent`: Contains the final joke and critique after any iterations
+
+### Setting up the Workflow with Stateful Middleware
+
+```typescript
+const { withState, getContext } = createStatefulMiddleware(() => ({
+  numIterations: 0,
+  maxIterations: 3,
+}));
+const jokeFlow = withState(createWorkflow());
+```
+
+Our workflow is implemented using the `createWorkflow()` function, enhanced with the `withState` middleware. This middleware provides shared state across all handlers, which in this case tracks:
+- `numIterations`: Counts how many iterations of joke improvement we've done
+- `maxIterations`: Sets a limit to prevent infinite loops
+
+This state will be accessible within workflows by using the `getContext().state` function.
+
+### Adding Handlers with Loops
+
+We have three key handlers in our workflow:
+
+1. The first handler processes the `startEvent`, generates an initial joke, and emits a `jokeEvent`:
+
+```typescript
+jokeFlow.handle([startEvent], async (event) => {
+  // Prompt the LLM to write a joke
+  const prompt = `Write your best joke about ${event.data}. Write the joke between <joke> and </joke> tags.`;
+  const response = await llm.complete({ prompt });
+ 
+  // Parse the joke from the response
+  const joke =
+    response.text.match(/<joke>([\s\S]*?)<\/joke>/)?.[1]?.trim() ??
+    response.text;
+  return jokeEvent.with({ joke: joke });
+});
+```
+
+2. The second handler handles the `jokeEvent`, critiques the joke, and either:
+   - Emits a `critiqueEvent` if the joke needs improvement
+   - Emits a `resultEvent` if the joke is good enough
+
+```typescript
+jokeFlow.handle([jokeEvent], async (event) => {
+  // Prompt the LLM to critique the joke
+  const prompt = `Give a thorough critique of the following joke. If the joke needs improvement, put "IMPROVE" somewhere in the critique: ${event.data.joke}`;
+  const response = await llm.complete({ prompt });
+ 
+  // If the critique includes "IMPROVE", keep iterating, else, return the result
+  if (response.text.includes("IMPROVE")) {
+    return critiqueEvent.with({
+      joke: event.data.joke,
+      critique: response.text,
+    });
+  }
+ 
+  return resultEvent.with({ joke: event.data.joke, critique: response.text });
+});
+```
+
+3. The third handler processes the `critiqueEvent`, generates an improved joke based on the critique, and either:
+   - Loops back to the joke evaluation (if under the iteration limit)
+   - Emits the final `resultEvent` (if iteration limit reached)
+
+```typescript
+jokeFlow.handle([critiqueEvent], async (event) => {
+  // Keep track of the number of iterations
+  const state = getContext().state;
+  state.numIterations++;
+ 
+  // Write a new joke based on the previous joke and critique
+  const prompt = `Write a new joke based on the following critique and the original joke. Write the joke between <joke> and </joke> tags.\n\nJoke: ${event.data.joke}\n\nCritique: ${event.data.critique}`;
+  const response = await llm.complete({ prompt });
+ 
+  // Parse the joke from the response
+  const joke =
+    response.text.match(/<joke>([\s\S]*?)<\/joke>/)?.[1]?.trim() ??
+    response.text;
+ 
+  // If we've done less than the max number of iterations, keep iterating
+  // else, return the result
+  if (state.numIterations < state.maxIterations) {
+    return jokeEvent.with({ joke: joke });
+  }
+ 
+  return resultEvent.with({ joke: joke, critique: event.data.critique });
+});
+```
+
+### Running the Workflow
+
+```typescript
+async function main() {
+  const { stream, sendEvent } = jokeFlow.createContext();
+  sendEvent(startEvent.with("pirates"));
+
+  let result: { joke: string, critique: string } | undefined;
+
+  for await (const event of stream) {
+    // console.log(event.data);  optionally log the event data
+    if (resultEvent.include(event)) {
+      result = event.data;
+      break; // Stop when we get the final result
+    }
+  }
+  
+  console.log(result);
+}
+```
+
+To run the workflow, we:
+1. Create a workflow context with `createContext()`
+2. Trigger the initial event with `sendEvent()`
+3. Listen to the event stream and process events as they arrive
+4. Use `include()` to check if an event is of a specific type
+5. Break the loop when we receive our final result
+
+### Using Stream Utilities
+
+The `stream` returned by `createContext` contains utility functions to make working with event streams easier:
+
+```typescript
+// Create a workflow context and send the initial event
+const { stream, sendEvent } = jokeFlow.createContext();
+sendEvent(startEvent.with("pirates"));
+
+// Collect all events until we get a resultEvent
+const allEvents = await stream.until(resultEvent).toArray(); 
+
+// The last event will be the resultEvent
+const finalEvent = allEvents.at(-1);
+console.log(finalEvent.data); // Output the joke and critique
+```
+
+The stream utilities make it easier to work with the asynchronous event flow. In this example, we use:
+- `toArray`: Aggregates all events into an array
+- `until`: Creates a stream that emits events until a condition is met (in this case, until a resultEvent is received)
+
+You can combine these utilities with other stream operators like `filter` and `map` to create powerful processing pipelines.
+
+## Next Steps
+
+To learn more about workflows, check out [the Workflows documentation](/docs/llamaindex/modules/agents/workflows).
@@ -1,3 +1,3 @@
 {
-  "pages": ["llamaindex", "llamaflow", "cloud", "api"]
+  "pages": ["llamaindex", "api", "workflows", "chat-ui"]
 }
--- a/Show More
+++ b/Show More