Compare commits

...

3 Commits

Author SHA1 Message Date
github-actions[bot] 85e5e7e662 Release 0.5.14 (#608)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2025-05-16 14:41:46 +07:00
Huu Le 58362542c0 chore: add workflow contract for server (#623) 2025-05-16 14:26:24 +07:00
Thuc Pham 6f44185f68 fix: init messages memory in start event handler (#627) 2025-05-16 12:45:35 +07:00
22 changed files with 264 additions and 102 deletions
-5
View File
@@ -1,5 +0,0 @@
---
"create-llama": patch
---
Split artifacts use case to document generator and code generator
-5
View File
@@ -1,5 +0,0 @@
---
"create-llama": patch
---
chore: improve dev experience with nodemon
-5
View File
@@ -1,5 +0,0 @@
---
"create-llama": patch
---
Fix typing check issue
-5
View File
@@ -1,5 +0,0 @@
---
"create-llama": patch
---
fix chromadb dependency issue
-5
View File
@@ -1,5 +0,0 @@
---
"@llamaindex/server": patch
---
feat: add dev mode UI
-5
View File
@@ -1,5 +0,0 @@
---
"create-llama": patch
---
fix: remove dead generated ai code
-5
View File
@@ -1,5 +0,0 @@
---
"create-llama": patch
---
Deprecate pro mode
+2 -2
View File
@@ -79,7 +79,7 @@ jobs:
- uses: actions/upload-artifact@v4
if: always()
with:
name: playwright-report-python-${{ matrix.os }}-${{ matrix.frameworks }}-${{ matrix.datasources }}
name: playwright-report-python-${{ matrix.os }}-${{ matrix.frameworks }}-${{ matrix.datasources }}-${{ matrix.template-types }}
path: packages/create-llama/playwright-report/
overwrite: true
retention-days: 30
@@ -149,7 +149,7 @@ jobs:
- uses: actions/upload-artifact@v4
if: always()
with:
name: playwright-report-typescript-${{ matrix.os }}-${{ matrix.frameworks }}-${{ matrix.datasources }}-node${{ matrix.node-version }}
name: playwright-report-typescript-${{ matrix.os }}-${{ matrix.frameworks }}-${{ matrix.datasources }}-node${{ matrix.node-version }}-${{ matrix.template-types }}
path: packages/create-llama/playwright-report/
overwrite: true
retention-days: 30
+11
View File
@@ -1,5 +1,16 @@
# create-llama
## 0.5.14
### Patch Changes
- 1df8cfb: Split artifacts use case to document generator and code generator
- 1b5a519: chore: improve dev experience with nodemon
- b3eb0ba: Fix typing check issue
- 556f33c: fix chromadb dependency issue
- 2451539: fix: remove dead generated ai code
- 7a70390: Deprecate pro mode
## 0.5.13
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "create-llama",
"version": "0.5.13",
"version": "0.5.14",
"description": "Create LlamaIndex-powered apps with one command",
"keywords": [
"rag",
@@ -1,5 +1,5 @@
import { extractLastArtifact } from "@llamaindex/server";
import { ChatMemoryBuffer, LLM, MessageContent, Settings } from "llamaindex";
import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";
import {
agentStreamEvent,
@@ -12,12 +12,6 @@ import {
import { z } from "zod";
export const workflowFactory = async (reqBody: any) => {
const workflow = createCodeArtifactWorkflow(reqBody);
return workflow;
};
export const RequirementSchema = z.object({
next_step: z.enum(["answering", "coding"]),
language: z.string().nullable().optional(),
@@ -71,32 +65,28 @@ const artifactEvent = workflowEvent<{
};
}>();
export function createCodeArtifactWorkflow(reqBody: any, llm?: LLM) {
if (!llm) {
llm = Settings.llm;
}
export function workflowFactory(reqBody: any) {
const llm = Settings.llm;
const { withState, getContext } = createStatefulMiddleware(() => {
return {
memory: new ChatMemoryBuffer({
llm,
chatHistory: reqBody.chatHistory,
}),
memory: new ChatMemoryBuffer({ llm }),
lastArtifact: extractLastArtifact(reqBody),
};
});
const workflow = withState(createWorkflow());
workflow.handle([startAgentEvent], async ({ data: { userInput } }) => {
workflow.handle([startAgentEvent], async ({ data }) => {
const { userInput, chatHistory = [] } = data;
// Prepare chat history
const { state } = getContext();
// Put user input to the memory
if (!userInput) {
throw new Error("Missing user input to start the workflow");
}
state.memory.put({
role: "user",
content: userInput,
});
state.memory.set(chatHistory);
state.memory.put({ role: "user", content: userInput });
return planEvent.with({
userInput: userInput,
});
@@ -1,5 +1,5 @@
import { extractLastArtifact } from "@llamaindex/server";
import { ChatMemoryBuffer, LLM, MessageContent, Settings } from "llamaindex";
import { ChatMemoryBuffer, MessageContent, Settings } from "llamaindex";
import {
agentStreamEvent,
@@ -12,12 +12,6 @@ import {
import { z } from "zod";
export const workflowFactory = async (reqBody: any) => {
const workflow = createDocumentArtifactWorkflow(reqBody);
return workflow;
};
export const DocumentRequirementSchema = z.object({
type: z.enum(["markdown", "html"]),
title: z.string(),
@@ -74,32 +68,28 @@ const artifactEvent = workflowEvent<{
};
}>();
export function createDocumentArtifactWorkflow(reqBody: any, llm?: LLM) {
if (!llm) {
llm = Settings.llm;
}
export function workflowFactory(reqBody: any) {
const llm = Settings.llm;
const { withState, getContext } = createStatefulMiddleware(() => {
return {
memory: new ChatMemoryBuffer({
llm,
chatHistory: reqBody.chatHistory,
}),
memory: new ChatMemoryBuffer({ llm }),
lastArtifact: extractLastArtifact(reqBody),
};
});
const workflow = withState(createWorkflow());
workflow.handle([startAgentEvent], async ({ data: { userInput } }) => {
workflow.handle([startAgentEvent], async ({ data }) => {
const { userInput, chatHistory = [] } = data;
// Prepare chat history
const { state } = getContext();
// Put user input to the memory
if (!userInput) {
throw new Error("Missing user input to start the workflow");
}
state.memory.put({
role: "user",
content: userInput,
});
state.memory.set(chatHistory);
state.memory.put({ role: "user", content: userInput });
return planEvent.with({
userInput,
context: state.lastArtifact
+6
View File
@@ -1,5 +1,11 @@
# @llamaindex/server
## 0.2.1
### Patch Changes
- f072308: feat: add dev mode UI
## 0.2.0
### Minor Changes
+128 -5
View File
@@ -4,10 +4,10 @@ LlamaIndexServer is a Next.js-based application that allows you to quickly launc
## Features
- Serving a workflow as a chatbot
- Add a sophisticated chatbot UI to your LlamaIndex workflow
- Edit code and document artifacts in an OpenAI Canvas-style UI
- Extendable UI components for events and headers
- Built on Next.js for high performance and easy API development
- Optional built-in chat UI with extendable UI components
- Prebuilt development code
## Installation
@@ -21,9 +21,11 @@ Create an `index.ts` file and add the following code:
```ts
import { LlamaIndexServer } from "@llamaindex/server";
import { openai } from "@llamaindex/openai";
import { agent } from "@llamaindex/workflow";
import { wiki } from "@llamaindex/tools"; // or any other tool
const createWorkflow = () => agent({ tools: [wiki()] });
const createWorkflow = () => agent({ tools: [wiki()], llm: openai("gpt-4o") });
new LlamaIndexServer({
workflow: createWorkflow,
@@ -34,6 +36,8 @@ new LlamaIndexServer({
}).start();
```
The `createWorkflow` function is a factory function that creates an [Agent Workflow](https://ts.llamaindex.ai/docs/llamaindex/modules/agents/agent_workflow) with a tool that retrieves information from Wikipedia in this case. For more details, read about the [Workflow factory contract](#workflow-factory-contract).
## Running the Server
In the same directory as `index.ts`, run the following command to start the server:
@@ -54,16 +58,75 @@ curl -X POST "http://localhost:3000/api/chat" -H "Content-Type: application/json
The `LlamaIndexServer` accepts the following configuration options:
- `workflow`: A callable function that creates a workflow instance for each request
- `workflow`: A callable function that creates a workflow instance for each request. See [Workflow factory contract](#workflow-factory-contract) for more details.
- `uiConfig`: An object to configure the chat UI containing the following properties:
- `appTitle`: The title of the application (default: `"LlamaIndex App"`)
- `starterQuestions`: List of starter questions for the chat UI (default: `[]`)
- `componentsDir`: The directory for custom UI components rendering events emitted by the workflow. The default is undefined, which does not render custom UI components.
- `llamaCloudIndexSelector`: Whether to show the LlamaCloud index selector in the chat UI (requires `LLAMA_CLOUD_API_KEY` to be set in the environment variables) (default: `false`)
- `dev_mode`: When enabled, you can update workflow code in the UI and see the changes immediately. It's currently in beta and only supports updating workflow code at `app/src/workflow.ts`. Please start server in dev mode (`npm run dev`) to use see this reload feature enabled.
LlamaIndexServer accepts all the configuration options from Nextjs Custom Server such as `port`, `hostname`, `dev`, etc.
See all Nextjs Custom Server options [here](https://nextjs.org/docs/app/building-your-application/configuring/custom-server).
## Workflow factory contract
The `workflow` provided will be called for each chat request to initialize a new workflow instance. The contract of the generated workflow must be the same as for the [Agent Workflow](https://ts.llamaindex.ai/docs/llamaindex/modules/agents/agent_workflow).
This means that the workflow must handle a `startAgentEvent` event, which is the entry point of the workflow and contains the following information in it's `data` property:
```typescript
{
userInput: MessageContent;
chatHistory?: ChatMessage[] | undefined;
};
```
The `userInput` is the latest user message and the `chatHistory` is the list of messages exchanged between the user and the workflow so far.
Furthermore, the workflow must stop with a `stopAgentEvent` event to mark the end of the workflow. In between, the workflow can emit [UI events](##AI-generated-UI-Components) to render custom UI components and [Artifact events](##Sending-Artifacts-to-the-UI) to send structured data like generated documents or code snippets to the UI.
```ts
import {
createStatefulMiddleware,
createWorkflow,
startAgentEvent,
} from "@llamaindex/workflow";
import { ChatMemoryBuffer, type ChatMessage, Settings } from "llamaindex";
import { openai } from "@llamaindex/openai";
import { wiki } from "@llamaindex/tools";
Settings.llm = openai("gpt-4o");
export const workflowFactory = async () => {
const workflow = createWorkflow();
workflow.handle([startAgentEvent], async ({ data }) => {
const { state, sendEvent } = getContext();
const messages = data.chatHistory;
const toolCallResponse = await chatWithTools(
Settings.llm,
[wiki()],
messages,
);
// using result from tool call and use `sendEvent` to emit the next event...
});
// define more workflow handling logic here...
// Finally stop with a `stopAgentEvent` event to mark the end of the workflow.
// return stopAgentEvent.with({
// result: "This is the end!",
// });
return workflow;
};
```
To generate sophisticated examples of workflows, you best use the [create-llama](https://github.com/run-llama/create-llama) project.
## AI-generated UI Components
The LlamaIndex server provides support for rendering workflow events using custom UI components, allowing you to extend and customize the chat interface.
@@ -137,6 +200,66 @@ new LlamaIndexServer({
}).start();
```
## Sending Artifacts to the UI
In addition to UI events for custom components, LlamaIndex Server supports a special `ArtifactEvent` to send structured data like generated documents or code snippets to the UI. These artifacts are displayed in a dedicated "Canvas" panel in the chat interface.
### Artifact Event Structure
To send an artifact, your workflow needs to emit an event with `type: "artifact"`. The `data` payload of this event should include:
- `type`: A string indicating the type of artifact (e.g., `"document"`, `"code"`).
- `created_at`: A timestamp (e.g., `Date.now()`) indicating when the artifact was created.
- `data`: An object containing the specific details of the artifact. The structure of this object depends on the artifact `type`.
### Defining and Sending an ArtifactEvent
First, define your artifact event using `workflowEvent` from `@llamaindex/workflow`:
```typescript
import { workflowEvent } from "@llamaindex/workflow";
// Example for a document artifact
const artifactEvent = workflowEvent<{
type: "artifact"; // Must be "artifact"
data: {
type: "document"; // Custom type for your artifact (e.g., "document", "code")
created_at: number;
data: {
// Specific data for the document artifact type
title: string;
content: string;
type: "markdown" | "html"; // document format
};
};
}>();
```
Then, within your workflow logic, use `sendEvent` (obtained from `getContext()`) to emit the event:
```typescript
// Assuming 'sendEvent' is available in your workflow handler
// and 'documentDetails' contains the content for the artifact.
sendEvent(
artifactEvent.with({
type: "artifact", // This top-level type must be "artifact"
data: {
type: "document", // This is your specific artifact type
created_at: Date.now(),
data: {
title: "My Generated Document",
content: "# Hello World
This is a markdown document.",
type: "markdown",
},
},
}),
);
```
This will send the artifact to the LlamaIndex Server UI, where it will be rendered in the [ChatCanvasPanel](/packages/server/next/app/components/ui/chat/canvas/panel.tsx) by a renderer depending on the artifact type. For type `document` this is using the [DocumentArtifactViewer](https://github.com/run-llama/chat-ui/blob/bacb75fc6edceacf742fba18632404a2483b5a81/packages/chat-ui/src/chat/canvas/artifacts/document.tsx#L17).
## Default Endpoints and Features
### Chat Endpoint
+1 -1
View File
@@ -6,7 +6,7 @@ This directory contains examples of how to use the LlamaIndex Server.
```bash
export OPENAI_API_KEY=your_openai_api_key
npx tsx simple-workflow/calculator.ts
pnpm run dev
```
## Open browser at http://localhost:3000
@@ -39,5 +39,5 @@ new LlamaIndexServer({
appTitle: "LlamaIndex App",
starterQuestions: ["What is the color of the dog?"],
},
port: 4100,
port: 3000,
}).start();
+2 -2
View File
@@ -9,7 +9,7 @@ new LlamaIndexServer({
appTitle: "Calculator",
devMode: true,
},
port: 6000,
port: 3000,
}).start();
```
@@ -17,5 +17,5 @@ Export OpenAI API key and start the server in dev mode.
```bash
export OPENAI_API_KEY=<your-openai-api-key>
npx tsx watch index.ts
npx nodemon --exec tsx index.ts
```
+1 -1
View File
@@ -11,5 +11,5 @@ new LlamaIndexServer({
"What is the weather in New York?",
],
},
port: 6005,
port: 3000,
}).start();
@@ -20,5 +20,5 @@ new LlamaIndexServer({
appTitle: "Calculator",
starterQuestions: ["1 + 1", "2 + 2"],
},
port: 4000,
port: 3000,
}).start();
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/server",
"description": "LlamaIndex Server",
"version": "0.2.0",
"version": "0.2.1",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+30 -12
View File
@@ -72,7 +72,7 @@ app = LlamaIndexServer(
The LlamaIndexServer accepts the following configuration parameters:
- `workflow_factory`: A callable that creates a workflow instance for each request
- `workflow_factory`: A callable that creates a workflow instance for each request. See [Workflow factory contract](#workflow-factory-contract) for more details.
- `logger`: Optional logger instance (defaults to uvicorn logger)
- `use_default_routers`: Whether to include default routers (chat, static file serving)
- `env`: Environment setting ('dev' enables CORS and UI by default)
@@ -88,6 +88,31 @@ The LlamaIndexServer accepts the following configuration parameters:
- `api_prefix`: API route prefix (default: "/api")
- `server_url`: The deployment URL of the server (default is None)
## Workflow factory contract
The `workflow_factory` provided will be called for each chat request to initialize a new workflow instance. Additionally, we provide the [ChatRequest](https://github.com/run-llama/create-llama/blob/afe9e9fc16427d20e1dfb635a45e7ed4b46285cb/python/llama-index-server/llama_index/server/api/models.py#L32) object, which includes the request information that is helpful for initializing the workflow. For example:
```python
def create_workflow(chat_request: ChatRequest) -> Workflow:
# using messages from the chat request to initialize the workflow
return MyCustomWorkflow(chat_request.messages)
```
Your workflow will be executed once for each chat request with the following input parameters are included in workflow's `StartEvent`:
- `user_msg` [str]: The current user message
- `chat_history` [list[[ChatMessage](https://docs.llamaindex.ai/en/stable/api_reference/prompts/#llama_index.core.prompts.ChatMessage)]]: All the previous messages of the conversation
Example:
```python
@step
def handle_start_event(ev: StartEvent) -> MyNextEvent:
user_msg = ev.user_msg
chat_history = ev.chat_history
...
```
Your workflows can emit `UIEvent` events to render [Custom UI Components](https://github.com/run-llama/create-llama/blob/main/python/llama-index-server/docs/custom_ui_component.md) in the chat UI to improve the user experience.
Furthermore, you can send `ArtifactEvent` events to render code or document [Artifacts](https://github.com/run-llama/create-llama/blob/main/python/llama-index-server/docs/custom_artifact_event.md) in a dedicated Canvas panel in the chat UI.
## Default Routers and Features
### Chat Router
@@ -108,11 +133,6 @@ When enabled, the server provides a chat interface at the root path (`/`) with:
- Real-time chat interface
- API endpoint integration
### Custom UI Components
You can add custom UI components for your workflow by providing `component_dir` config and adding custom .jsx or .tsx files to the directory.
See [Custom UI Components](https://github.com/run-llama/create-llama/blob/main/llama-index-server/docs/custom_ui_component.md) for more details.
## Development Mode
In development mode (`env="dev"`), the server:
@@ -135,7 +155,6 @@ app = LlamaIndexServer(
**Note**: The workflow editor is currently in beta and only supports updating LlamaIndexServer projects created with [create-llama](https://github.com/run-llama/create-llama/). You also need to start the server via `fastapi dev` so that the server can hot reload the workflow code.
## API Endpoints
The server provides the following default endpoints:
@@ -146,11 +165,10 @@ The server provides the following default endpoints:
## Best Practices
1. Always provide a workflow factory that creates fresh workflow instances
2. Use environment variables for sensitive configuration
3. Enable verbose logging during development
4. Configure CORS appropriately for your deployment environment
5. Use starter questions to guide users in the chat UI
1. Use environment variables for sensitive configuration
2. Enable verbose logging during development
3. Configure CORS appropriately for your deployment environment
4. Use starter questions to guide users in the chat UI
## Getting Started with a New Project
@@ -0,0 +1,59 @@
# Sending Artifacts to the UI
In addition to UI events for custom components, LlamaIndex Server supports a special `ArtifactEvent` to send structured data like generated documents or code snippets to the UI. These artifacts are displayed in a dedicated "Canvas" panel in the chat interface.
## Artifact Event Structure
To send an artifact, your workflow needs to emit an event with `type: "artifact"`. The `data` payload of this event should include:
- `type`: An `ArtifactType` enum indicating the type of artifact (e.g., `ArtifactType.DOCUMENT`, `ArtifactType.CODE`).
- `created_at`: A timestamp (e.g., `int(time.time())`) indicating when the artifact was created.
- `data`: An object containing the specific details of the artifact. The structure of this object depends on the artifact `type`. For example, `DocumentArtifactData` or `CodeArtifactData`.
## Defining and Sending an ArtifactEvent
First, import the necessary classes:
```python
import time
from llama_index.server.api.models import (
Artifact,
ArtifactEvent,
ArtifactType,
DocumentArtifactData,
# CodeArtifactData, # Import if sending code artifacts
)
```
Then, within your workflow logic, use `ctx.write_event_to_stream` to emit the event. Here's an example of sending a document artifact, taken from [document_workflow.py](/python/llama-index-server/examples/artifact/document_workflow.py):
```python
# Assuming 'ctx' is the workflow Context and 'content' is a markdown string
ctx.write_event_to_stream(
ArtifactEvent(
data=Artifact(
type=ArtifactType.DOCUMENT,
created_at=int(time.time()),
data=DocumentArtifactData(
title="My cooking recipes",
content=content,
type="markdown",
),
),
)
)
```
This will send the artifact to the LlamaIndex Server UI, where it will be rendered in the Canvas panel by a renderer depending on the artifact type. For `ArtifactType.DOCUMENT`, this uses a `DocumentArtifactViewer`.
## Available Artifact Types
LlamaIndex Server currently supports the following artifact types:
- `ArtifactType.DOCUMENT`: For text-based documents like Markdown or HTML.
- `data` should be an instance of `DocumentArtifactData` which includes `title: str`, `content: str`, and `type: Literal["markdown", "html"]`.
- `ArtifactType.CODE`: For code snippets.
- `data` should be an instance of `CodeArtifactData` which includes `title: str`, `code: str`, and `language: str`.
Ensure you provide the correct data model corresponding to the `ArtifactType` you are sending. You can find these data models in `llama_index.server.api.models`.