Compare commits

...

8 Commits

Author SHA1 Message Date
github-actions[bot] bc56fa3c5f Release 0.5.20 (#671)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2025-06-02 18:02:05 +07:00
Huu Le 087c96164d feat: [server] Add Human in the Loop example with FastAPI integration (#630) 2025-06-02 17:47:04 +07:00
Thuc Pham 3ff0a18876 fix: default header padding (#672) 2025-05-31 14:08:29 +07:00
Thuc Pham df1047480a fix: missing cursor pointer for button (#670) 2025-05-30 09:52:17 +07:00
Marcus Schiesser 8d89223a08 chore: fill empty chat message default 2025-05-29 21:05:53 +07:00
github-actions[bot] 49a944182f Release 0.2.5 (#669)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2025-05-29 13:06:58 +07:00
Marcus Schiesser 058b3762c1 fix: update generate script path for ejected project (#668) 2025-05-29 12:21:17 +07:00
Thuc Pham 4c8579b04f use eject file in linux (#663) 2025-05-29 09:15:52 +07:00
56 changed files with 1026 additions and 289 deletions
+6
View File
@@ -1,5 +1,11 @@
# create-llama
## 0.5.20
### Patch Changes
- 3ff0a18: fix: default header padding
## 0.5.19
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "create-llama",
"version": "0.5.19",
"version": "0.5.20",
"description": "Create LlamaIndex-powered apps with one command",
"keywords": [
"rag",
@@ -4,7 +4,7 @@ import { Sparkles, Star } from "lucide-react";
export default function Header() {
return (
<div className="flex items-center justify-between px-4 pt-2">
<div className="flex items-center justify-between p-2 px-4">
<div className="flex items-center gap-2">
<Sparkles className="size-4" />
<h1 className="font-semibold">LlamaIndex App</h1>
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/server
## 0.2.6
### Patch Changes
- 3ff0a18: fix: default header padding
- df10474: fix: missing cursor pointer for button
- 087c961: Support zod and chat-ui hooks for custom components
## 0.2.5
### Patch Changes
- 058b376: Fix generate script for ejected project
## 0.2.4
### Patch Changes
Regular → Executable
View File
@@ -4,7 +4,7 @@ import { Sparkles, Star } from "lucide-react";
export default function Header() {
return (
<div className="flex items-center justify-between px-4 pt-2">
<div className="flex items-center justify-between p-2 px-4">
<div className="flex items-center gap-2">
<Sparkles className="size-4" />
<h1 className="font-semibold">LlamaIndex App</h1>
@@ -32,7 +32,10 @@ export default function CustomChatMessages({
<ChatMessage.Actions />
</ChatMessage>
))}
<ChatMessages.Empty />
<ChatMessages.Empty
heading="Hello there!"
subheading="I'm here to help you with your questions."
/>
<ChatMessages.Loading />
</ChatMessages.List>
<ChatStarter />
@@ -1,7 +1,7 @@
"use client";
import {
getChatUIAnnotation,
getAnnotationData,
JSONValue,
MessageAnnotation,
MessageAnnotationType,
@@ -25,9 +25,8 @@ export const DynamicEvents = ({
componentDefs: ComponentDef[];
appendError: (error: string) => void;
}) => {
const {
message: { annotations },
} = useChatMessage();
const { message } = useChatMessage();
const annotations = message.annotations;
const shownWarningsRef = useRef<Set<string>>(new Set()); // track warnings
const [hasErrors, setHasErrors] = useState(false);
@@ -43,15 +42,16 @@ export const DynamicEvents = ({
const availableComponents = new Set(componentDefs.map((comp) => comp.type));
annotations.forEach((annotation: MessageAnnotation) => {
annotations.forEach((item: JSONValue) => {
const annotation = item as MessageAnnotation;
const type = annotation.type;
if (!type) return; // skip if annotation doesn't have a type
if (!type) return; // Skip if annotation doesn't have a type
const events = getChatUIAnnotation(annotations, type);
const events = getAnnotationData<JSONValue>(message, type);
// Skip if it's a built-in component or if we've already shown the warning
if (
BUILT_IN_CHATUI_COMPONENTS.includes(type) ||
BUILT_IN_CHATUI_COMPONENTS.includes(type as MessageAnnotationType) ||
shownWarningsRef.current.has(type)
) {
return;
@@ -69,7 +69,7 @@ export const DynamicEvents = ({
const components: EventComponent[] = componentDefs
.map((comp) => {
const events = getChatUIAnnotation(annotations, comp.type) as JSONValue[]; // get all event data by type
const events = getAnnotationData<JSONValue>(message, comp.type);
if (!events?.length) return null;
return { ...comp, events };
})
@@ -67,6 +67,9 @@ export const SOURCE_MAP: Record<string, () => Promise<any>> = {
import("../../../toggle-group"),
[`${SHADCN_IMPORT_PREFIX}/tooltip`]: () => import("../../../tooltip"),
///// CHAT_UI GENERAL /////
[`@llamaindex/chat-ui`]: () => import("@llamaindex/chat-ui"),
///// WIDGETS FROM CHAT_UI /////
[`@llamaindex/chat-ui/widgets`]: () => import("@llamaindex/chat-ui/widgets"),
@@ -76,6 +79,9 @@ export const SOURCE_MAP: Record<string, () => Promise<any>> = {
///// UTILS /////
[`@/components/lib/utils`]: () => import("../../../lib/utils"),
[`@/lib/utils`]: () => import("../../../lib/utils"), // for v0 compatibility
///// ZOD /////
[`zod`]: () => import("zod"),
};
// parse imports from code to get Function constructor arguments and component name
@@ -122,7 +128,7 @@ export async function parseImports(code: string) {
const importPromises = imports.map(async ({ name, source }) => {
if (!(source in SOURCE_MAP)) {
throw new Error(
`Fail to import ${name} from ${source}. Reason: Module not found. \nCurrently we only support importing UI components from Shadcn components, widgets from "llamaindex/chat-ui/widgets" and icons from "lucide-react"`,
`Fail to import ${name} from ${source}. Reason: Module not found. \nCurrently we only support importing UI components from Shadcn components, widgets and hooks from "llamaindex/chat-ui", icons from "lucide-react" and zod for data validation.`,
);
}
try {
@@ -1,7 +1,9 @@
"use client";
import { SourceData } from "@llamaindex/chat-ui";
import { Markdown as MarkdownUI } from "@llamaindex/chat-ui/widgets";
import {
Markdown as MarkdownUI,
SourceData,
} from "@llamaindex/chat-ui/widgets";
import { getConfig } from "../../lib/utils";
const preprocessMedia = (content: string) => {
// Remove `sandbox:` from the beginning of the URL before rendering markdown
@@ -4,7 +4,7 @@ import { Sparkles, Star } from "lucide-react";
export function DefaultHeader() {
return (
<div className="flex items-center justify-between px-4 pt-2">
<div className="flex items-center justify-between p-2 px-4">
<div className="flex items-center gap-2">
<Sparkles className="size-4" />
<h1 className="font-semibold">LlamaIndex App</h1>
@@ -2,8 +2,7 @@
import {
Message,
MessageAnnotation,
getChatUIAnnotation,
getAnnotationData,
useChatMessage,
useChatUI,
} from "@llamaindex/chat-ui";
@@ -21,13 +20,10 @@ export function ToolAnnotations() {
[messages, message],
);
// Get the tool data from the message annotations
const annotations = message.annotations as MessageAnnotation[] | undefined;
const toolData = annotations
? (getChatUIAnnotation(annotations, "tools") as unknown as ToolData[])
: null;
return toolData?.[0] ? (
<ChatTools data={toolData[0]} artifactVersion={artifactVersion} />
) : null;
const toolData = getAnnotationData<ToolData>(message, "tools");
if (toolData.length === 0) return null;
return <ChatTools data={toolData[0]} artifactVersion={artifactVersion} />;
}
// TODO: Used to render outputs of tools. If needed, add more renderers here.
@@ -83,9 +79,7 @@ function getArtifactVersion(
if (!messageId) return undefined;
let versionIndex = 1;
for (const m of messages) {
const toolData = m.annotations
? (getChatUIAnnotation(m.annotations, "tools") as unknown as ToolData[])
: null;
const toolData = getAnnotationData<ToolData>(m, "tools");
if (toolData?.some((t) => t.toolCall.name === "artifact")) {
if ("id" in m && m.id === messageId) {
+7
View File
@@ -91,6 +91,13 @@
::file-selector-button {
border-color: var(--color-gray-200, currentColor);
}
/* Tailwind v4 removed cursor pointer of button and use default cursor */
/* https://github.com/shadcn-ui/ui/issues/6843#issuecomment-2696947980 */
button:not([disabled]),
[role="button"]:not([disabled]) {
cursor: pointer;
}
}
@layer base {
+2 -2
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/server",
"description": "LlamaIndex Server",
"version": "0.2.4",
"version": "0.2.6",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
@@ -65,7 +65,7 @@
"@babel/traverse": "^7.27.0",
"@babel/types": "^7.27.0",
"@hookform/resolvers": "^5.0.1",
"@llamaindex/chat-ui": "0.4.6",
"@llamaindex/chat-ui": "0.4.9",
"@radix-ui/react-accordion": "^1.2.3",
"@radix-ui/react-alert-dialog": "^1.1.7",
"@radix-ui/react-aspect-ratio": "^1.1.3",
+2 -2
View File
@@ -12,7 +12,7 @@
"format": "prettier --ignore-unknown --cache --check .",
"format:write": "prettier --ignore-unknown --write .",
"typecheck": "tsc --noEmit",
"generate": "tsx app\\api\\chat\\generate.ts"
"generate": "tsx app/api/chat/generate.ts"
},
"devDependencies": {
"@eslint/eslintrc": "^3",
@@ -41,7 +41,7 @@
"@babel/traverse": "^7.27.0",
"@babel/types": "^7.27.0",
"@hookform/resolvers": "^5.0.1",
"@llamaindex/chat-ui": "0.4.5",
"@llamaindex/chat-ui": "0.4.9",
"@llamaindex/env": "~0.1.30",
"@llamaindex/openai": "~0.4.0",
"@llamaindex/readers": "~3.1.4",
+5 -5
View File
@@ -181,8 +181,8 @@ importers:
specifier: ^5.0.1
version: 5.0.1(react-hook-form@7.56.1(react@19.1.0))
'@llamaindex/chat-ui':
specifier: 0.4.6
version: 0.4.6(@babel/runtime@7.27.0)(@codemirror/autocomplete@6.18.6)(@codemirror/language@6.11.0)(@codemirror/lint@6.8.5)(@codemirror/search@6.5.10)(@codemirror/state@6.5.2)(@codemirror/theme-one-dark@6.1.2)(@codemirror/view@6.36.7)(@types/react-dom@19.1.2(@types/react@19.1.2))(@types/react@19.1.2)(codemirror@6.0.1)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
specifier: 0.4.9
version: 0.4.9(@babel/runtime@7.27.0)(@codemirror/autocomplete@6.18.6)(@codemirror/language@6.11.0)(@codemirror/lint@6.8.5)(@codemirror/search@6.5.10)(@codemirror/state@6.5.2)(@codemirror/theme-one-dark@6.1.2)(@codemirror/view@6.36.7)(@types/react-dom@19.1.2(@types/react@19.1.2))(@types/react@19.1.2)(codemirror@6.0.1)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
'@llamaindex/env':
specifier: ~0.1.30
version: 0.1.30
@@ -1189,8 +1189,8 @@ packages:
zod:
optional: true
'@llamaindex/chat-ui@0.4.6':
resolution: {integrity: sha512-XvJEv/rv//8vY9Z4RosbmTyPDQFyVaWlQFe0zrJ4inz+aYqHhYtEiSCmQGgPQG+NqWStlTwpOpCye1jy4mWciQ==}
'@llamaindex/chat-ui@0.4.9':
resolution: {integrity: sha512-KEdydC+aJ22VK/TltxIHlMWbWLfh6I0YkyVd1D/CS3FRfLt8l9jfQ/YjY10MiEd8oc1fFfk6ek/FhVWe9Szstg==}
peerDependencies:
react: ^18.2.0 || ^19.0.0 || ^19.0.0-rc
@@ -7219,7 +7219,7 @@ snapshots:
p-retry: 6.2.1
zod: 3.24.3
'@llamaindex/chat-ui@0.4.6(@babel/runtime@7.27.0)(@codemirror/autocomplete@6.18.6)(@codemirror/language@6.11.0)(@codemirror/lint@6.8.5)(@codemirror/search@6.5.10)(@codemirror/state@6.5.2)(@codemirror/theme-one-dark@6.1.2)(@codemirror/view@6.36.7)(@types/react-dom@19.1.2(@types/react@19.1.2))(@types/react@19.1.2)(codemirror@6.0.1)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
'@llamaindex/chat-ui@0.4.9(@babel/runtime@7.27.0)(@codemirror/autocomplete@6.18.6)(@codemirror/language@6.11.0)(@codemirror/lint@6.8.5)(@codemirror/search@6.5.10)(@codemirror/state@6.5.2)(@codemirror/theme-one-dark@6.1.2)(@codemirror/view@6.36.7)(@types/react-dom@19.1.2(@types/react@19.1.2))(@types/react@19.1.2)(codemirror@6.0.1)(react-dom@19.1.0(react@19.1.0))(react@19.1.0)':
dependencies:
'@codemirror/lang-css': 6.3.1
'@codemirror/lang-html': 6.4.9
+18
View File
@@ -1,5 +1,23 @@
# @create-llama/llama-index-server
## 0.1.20
### Patch Changes
- 087c961: Add support for human-in-the-loop
- 087c961: Refactor models.py into a separate module
- Updated dependencies [3ff0a18]
- Updated dependencies [df10474]
- Updated dependencies [087c961]
- @llamaindex/server@0.2.6
## 0.1.19
### Patch Changes
- Updated dependencies [058b376]
- @llamaindex/server@0.2.5
## 0.1.18
### Patch Changes
+1
View File
@@ -8,6 +8,7 @@ LlamaIndexServer is a FastAPI-based application that allows you to quickly launc
- Built on FastAPI for high performance and easy API development
- Optional built-in chat UI with extendable UI components
- Prebuilt development code
- Human-in-the-loop (HITL) support, check out the [Human-in-the-loop](https://github.com/run-llama/create-llama/blob/main/python/llama-index-server/examples/hitl/README.md) documentation for more details.
## Installation
@@ -16,7 +16,8 @@ from llama_index.core.workflow import (
Workflow,
step,
)
from llama_index.server.api.models import (
from llama_index.server.api.utils import get_last_artifact
from llama_index.server.models import (
Artifact,
ArtifactEvent,
ArtifactType,
@@ -24,7 +25,6 @@ from llama_index.server.api.models import (
CodeArtifactData,
UIEvent,
)
from llama_index.server.api.utils import get_last_artifact
class Requirement(BaseModel):
@@ -16,7 +16,8 @@ from llama_index.core.workflow import (
Workflow,
step,
)
from llama_index.server.api.models import (
from llama_index.server.api.utils import get_last_artifact
from llama_index.server.models import (
Artifact,
ArtifactEvent,
ArtifactType,
@@ -24,7 +25,6 @@ from llama_index.server.api.models import (
DocumentArtifactData,
UIEvent,
)
from llama_index.server.api.utils import get_last_artifact
class DocumentRequirement(BaseModel):
@@ -4,7 +4,7 @@ import { Sparkles, Star } from "lucide-react";
export default function Header() {
return (
<div className="flex items-center justify-between px-4 pt-2">
<div className="flex items-center justify-between p-2 px-4">
<div className="flex items-center gap-2">
<Sparkles className="size-4" />
<h1 className="font-semibold">Artifact Workflow</h1>
@@ -7,7 +7,7 @@ from examples.artifact.code_workflow import ArtifactWorkflow
from llama_index.core.workflow import Workflow
from llama_index.llms.openai import OpenAI
from llama_index.server import LlamaIndexServer, UIConfig
from llama_index.server.api.models import ChatRequest
from llama_index.server.models import ChatRequest
def create_workflow(chat_request: ChatRequest) -> Workflow:
@@ -0,0 +1,74 @@
# Human in the Loop
This example shows how to use the LlamaIndexServer with a human in the loop.
## AgentWorkflow
```bash
uv run -- agent_workflow.py
```
## Custom Workflow
```bash
uv run -- custom_workflow.py
```
## How does it work?
The human-in-the-loop approach used here is based on a simple idea: the workflow pauses and waits for a human response before proceeding to the next step.
To do this, you will need to implement two custom events:
+ [HumanInputEvent](../../llama_index/server/models/hitl.py#L10): This event is used to request input from the user.
+ [HumanResponseEvent](../../llama_index/server/models/hitl.py#L43): This event is sent to the workflow to resume execution with input from the user.
In this example, we have implemented these two custom events:
- [CLIHumanInputEvent](events.py#L20) to request input from the user for CLI command execution.
- [CLIHumanResponseEvent](events.py#L8) to resume the workflow with the response from the user.
We also have a custom component, [cli_human_input.tsx](./components/cli_human_input.tsx), which displays a card that the user can update the command and choose to execute or cancel the command execution.
To make the [AgentWorkflow](agent_workflow.py) work, we use the `wait_for_event()` method to wait for the human response when a tool is called.
Example:
```python
async def cli_executor(ctx: Context, command: str) -> str:
"""
This tool carefully waits for user confirmation before executing a command.
"""
confirmation = await ctx.wait_for_event(
CLIHumanResponseEvent,
waiter_event=CLIHumanInputEvent(
data=CLICommand(command=command),
),
)
if confirmation.execute:
# Execute the command
...
else:
# Cancel the command
...
```
And for [Custom Workflow](custom_workflow.py), we can define a step that send the `CLIHumanInputEvent` and another step that wait for the `CLIHumanResponseEvent`.
Example:
```python
@step
async def request_input(self, ctx: Context, ev: StartEvent) -> CLIHumanInputEvent:
...
return CLIHumanInputEvent(
data=CLICommand(command=command),
response_event_type=CLIHumanResponseEvent,
)
@step
async def handle_human_response(self, ctx: Context, ev: CLIHumanResponseEvent) -> StopEvent:
if ev.execute:
# Execute the command
...
else:
# Cancel the command
...
```
@@ -0,0 +1,60 @@
import subprocess
from events import CLICommand, CLIHumanInputEvent, CLIHumanResponseEvent
from fastapi import FastAPI
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.core.workflow import Context
from llama_index.llms.openai import OpenAI
from llama_index.server import LlamaIndexServer, UIConfig
async def cli_executor(ctx: Context, command: str) -> str:
"""
This tool carefully waits for user confirmation before executing a command.
"""
confirmation = await ctx.wait_for_event(
CLIHumanResponseEvent,
waiter_event=CLIHumanInputEvent(
data=CLICommand(command=command),
),
)
if confirmation.execute:
return subprocess.check_output(confirmation.command, shell=True).decode("utf-8")
else:
return "Command execution cancelled."
def create_workflow() -> AgentWorkflow:
return AgentWorkflow.from_tools_or_functions(
tools_or_functions=[cli_executor],
llm=OpenAI(model="gpt-4.1-mini"),
system_prompt="""
You are a helpful assistant that help the user execute commands.
You can execute commands using the cli_executor tool, don't need to ask for confirmation for triggering the tool.
""",
)
def create_app() -> FastAPI:
app = LlamaIndexServer(
workflow_factory=create_workflow,
suggest_next_questions=False,
ui_config=UIConfig(
starter_questions=[
"List all files in the current directory",
"Fetch changes from the remote repository",
],
component_dir="components",
),
)
return app
app = create_app()
if __name__ == "__main__":
import uvicorn
uvicorn.run("agent_workflow:app", port=8000, reload=True)
@@ -0,0 +1,96 @@
import { JSONValue, useChatUI } from "@llamaindex/chat-ui";
import React, { FC, useState } from "react";
import { Button } from "@/components/ui/button";
import { Card, CardContent, CardFooter } from "@/components/ui/card";
import { z } from "zod";
// This schema is equivalent to the CLICommand model defined in events.py
const CLIInputEventSchema = z.object({
command: z.string(),
});
type CLIInputEvent = z.infer<typeof CLIInputEventSchema>;
const CLIHumanInput: FC<{
events: JSONValue[];
}> = ({ events }) => {
const inputEvent = (events || [])
.map((ev) => {
const parseResult = CLIInputEventSchema.safeParse(ev);
return parseResult.success ? parseResult.data : null;
})
.filter((ev): ev is CLIInputEvent => ev !== null)
.at(-1);
const { append } = useChatUI();
const [confirmedValue, setConfirmedValue] = useState<boolean | null>(null);
const [editableCommand, setEditableCommand] = useState<string | undefined>(
inputEvent?.command,
);
// Update editableCommand if inputEvent changes (e.g. new event comes in)
React.useEffect(() => {
setEditableCommand(inputEvent?.command);
}, [inputEvent?.command]);
const handleConfirm = () => {
append({
content: "Yes",
role: "user",
annotations: [
{
type: "human_response",
data: {
execute: true,
command: editableCommand, // Use editable command
},
},
],
});
setConfirmedValue(true);
};
const handleCancel = () => {
append({
content: "No",
role: "user",
annotations: [
{
type: "human_response",
data: {
execute: false,
command: inputEvent?.command,
},
},
],
});
setConfirmedValue(false);
};
return (
<Card className="my-4">
<CardContent className="pt-6">
<p className="text-sm text-gray-700">
Do you want to execute the following command?
</p>
<input
disabled
type="text"
value={editableCommand || ""}
onChange={(e) => setEditableCommand(e.target.value)}
className="bg-gray-100 rounded p-3 my-2 text-xs font-mono text-gray-800 overflow-x-auto w-full border border-gray-300"
/>
</CardContent>
{confirmedValue === null ? (
<CardFooter className="flex justify-end gap-2">
<>
<Button onClick={handleConfirm}>Yes</Button>
<Button onClick={handleCancel}>No</Button>
</>
</CardFooter>
) : null}
</Card>
);
};
export default CLIHumanInput;
@@ -0,0 +1,109 @@
import platform
import subprocess
from typing import Any
from events import CLICommand, CLIHumanInputEvent, CLIHumanResponseEvent
from fastapi import FastAPI
from llama_index.core.prompts import PromptTemplate
from llama_index.core.settings import Settings
from llama_index.core.workflow import (
Context,
StartEvent,
StopEvent,
Workflow,
step,
)
from llama_index.server import LlamaIndexServer, UIConfig
class CLIWorkflow(Workflow):
"""
A workflow has ability to execute command line tool with human in the loop for confirmation.
"""
default_prompt = PromptTemplate(
template="""
You are a helpful assistant who can write CLI commands to execute using {cli_language}.
Your task is to analyze the user's request and write a CLI command to execute.
## User Request
{user_request}
Don't be verbose, only respond with the CLI command without any other text.
"""
)
def __init__(self, **kwargs: Any) -> None:
# HITL Workflow should disable timeout otherwise, we will get a timeout error from callback
kwargs["timeout"] = None
super().__init__(**kwargs)
@step
async def start(self, ctx: Context, ev: StartEvent) -> CLIHumanInputEvent:
user_msg = ev.user_msg
if user_msg is None:
raise ValueError("Missing user_msg in StartEvent")
await ctx.set("user_msg", user_msg)
# Request LLM to generate a CLI command
os_name = platform.system()
if os_name == "Linux" or os_name == "Darwin":
cli_language = "bash"
else:
cli_language = "cmd"
prompt = self.default_prompt.format(
user_request=user_msg, cli_language=cli_language
)
llm = Settings.llm
if llm is None:
raise ValueError("Missing LLM in Settings")
response = await llm.acomplete(prompt, formatted=True)
command = response.text.strip()
if command == "":
raise ValueError("Couldn't generate a command")
# Send the command to the user for confirmation
await ctx.set("command", command)
return CLIHumanInputEvent( # type: ignore
data=CLICommand(command=command),
response_event_type=CLIHumanResponseEvent,
)
@step
async def handle_human_response(
self,
ctx: Context,
ev: CLIHumanResponseEvent, # This event is sent by LlamaIndexServer when user response
) -> StopEvent:
# If we have human response, check the confirmation and execute the command
if ev.execute:
command = ev.command or ""
if command == "":
raise ValueError("Missing command in CLIExecutionEvent")
res = subprocess.run(command, shell=True, capture_output=True, text=True)
return StopEvent(result=res.stdout or res.stderr)
else:
return StopEvent(result=None)
def create_app() -> FastAPI:
app = LlamaIndexServer(
workflow_factory=lambda: CLIWorkflow(),
suggest_next_questions=False,
ui_config=UIConfig(
starter_questions=[
"List all files in the current directory",
"Fetch changes from the remote repository",
],
component_dir="components",
),
)
return app
app = create_app()
if __name__ == "__main__":
import uvicorn
uvicorn.run("custom_workflow:app", port=8000, reload=True)
@@ -0,0 +1,34 @@
from typing import Type
from pydantic import BaseModel, Field
from llama_index.server.models import HumanInputEvent, HumanResponseEvent
class CLIHumanResponseEvent(HumanResponseEvent):
execute: bool = Field(
description="True if the human wants to execute the command, False otherwise."
)
command: str = Field(description="The command to execute.")
class CLICommand(BaseModel):
command: str = Field(description="The command to execute.")
# We need an event that extends from HumanInputEvent for HITL feature
class CLIHumanInputEvent(HumanInputEvent):
"""
CLIInputRequiredEvent is sent when the agent needs permission from the user to execute the CLI command or not.
Render this event by showing the command and a boolean button to execute the command or not.
"""
event_type: str = (
"cli_human_input" # used by UI to render with appropriate component
)
response_event_type: Type = (
CLIHumanResponseEvent # used by workflow to resume with the correct event
)
data: CLICommand = Field( # the data that sent to the UI for rendering
description="The command to execute.",
)
@@ -2,13 +2,14 @@ import os
from typing import List, Optional
from fastapi import FastAPI
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.core.query_engine.retriever_query_engine import RetrieverQueryEngine
from llama_index.core.settings import Settings
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.llms.openai import OpenAI
from llama_index.server import LlamaIndexServer, UIConfig
from llama_index.server.api.models import ChatRequest
from llama_index.server.models import ChatRequest
from llama_index.server.services.llamacloud import LlamaCloudIndex, get_index
from llama_index.server.tools.index.citation import (
CITATION_SYSTEM_PROMPT,
@@ -3,7 +3,7 @@ from typing import Optional
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.core.settings import Settings
from llama_index.llms.openai import OpenAI
from llama_index.server.api.models import ChatRequest
from llama_index.server.models import ChatRequest
def create_workflow(chat_request: Optional[ChatRequest] = None) -> AgentWorkflow:
@@ -1,4 +1,4 @@
from .api.models import UIEvent
from .models.ui import UIEvent
from .server import LlamaIndexServer, UIConfig
__all__ = ["LlamaIndexServer", "UIConfig", "UIEvent"]
@@ -3,7 +3,7 @@ from typing import Any
from llama_index.core.agent.workflow.workflow_events import ToolCall, ToolCallResult
from llama_index.server.api.callbacks.base import EventCallback
from llama_index.server.api.models import AgentRunEvent
from llama_index.server.models.ui import AgentRunEvent
logger = logging.getLogger("uvicorn")
@@ -4,7 +4,7 @@ from typing import Any, List, Optional
from llama_index.core.agent.workflow.workflow_events import ToolCallResult
from llama_index.core.schema import NodeWithScore
from llama_index.server.api.callbacks.base import EventCallback
from llama_index.server.api.models import SourceNodesEvent
from llama_index.server.models.source_nodes import SourceNodesEvent
logger = logging.getLogger(__name__)
@@ -2,7 +2,7 @@ import logging
from typing import Any, Optional
from llama_index.server.api.callbacks.base import EventCallback
from llama_index.server.api.models import ChatRequest
from llama_index.server.models.chat import ChatRequest
from llama_index.server.services.suggest_next_question import (
SuggestNextQuestionsService,
)
@@ -1,196 +1,2 @@
import logging
import os
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Union
from pydantic import BaseModel, field_validator
from llama_index.core.schema import NodeWithScore
from llama_index.core.types import ChatMessage, MessageRole
from llama_index.core.workflow import Event
from llama_index.server.settings import server_settings
from llama_index.server.utils import llamacloud
logger = logging.getLogger("uvicorn")
class ChatAPIMessage(BaseModel):
role: MessageRole
content: str
annotations: Optional[List[Any]] = None
def to_llamaindex_message(self) -> ChatMessage:
return ChatMessage(role=self.role, content=self.content)
class ChatRequest(BaseModel):
messages: List[ChatAPIMessage]
data: Optional[Any] = None
@field_validator("messages")
def validate_messages(cls, v: List[ChatAPIMessage]) -> List[ChatAPIMessage]:
if v[-1].role != MessageRole.USER:
raise ValueError("Last message must be from user")
return v
class AgentRunEventType(Enum):
TEXT = "text"
PROGRESS = "progress"
class AgentRunEvent(Event):
name: str
msg: str
event_type: AgentRunEventType = AgentRunEventType.TEXT
data: Optional[dict] = None
def to_response(self) -> dict:
return {
"type": "agent",
"data": {
"agent": self.name,
"type": self.event_type.value,
"text": self.msg,
"data": self.data,
},
}
class SourceNodesEvent(Event):
nodes: List[NodeWithScore]
def to_response(self) -> dict:
return {
"type": "sources",
"data": {
"nodes": [
SourceNodes.from_source_node(node).model_dump()
for node in self.nodes
]
},
}
class SourceNodes(BaseModel):
id: str
metadata: Dict[str, Any]
score: Optional[float]
text: str
url: Optional[str]
@classmethod
def from_source_node(cls, source_node: NodeWithScore) -> "SourceNodes":
metadata = source_node.node.metadata
url = cls.get_url_from_metadata(metadata)
return cls(
id=source_node.node.node_id,
metadata=metadata,
score=source_node.score,
text=source_node.node.text, # type: ignore
url=url,
)
@classmethod
def get_url_from_metadata(
cls,
metadata: Dict[str, Any],
data_dir: Optional[str] = None,
) -> Optional[str]:
url_prefix = server_settings.file_server_url_prefix
if data_dir is None:
data_dir = "data"
file_name = metadata.get("file_name")
if file_name and url_prefix:
if llamacloud.is_llamacloud_file(metadata):
file_name = llamacloud.get_local_file_name(metadata)
return f"{url_prefix}/output/llamacloud/{file_name}"
is_private = metadata.get("private", "false") == "true"
if is_private:
# file is a private upload
return f"{url_prefix}/output/uploaded/{file_name}"
# file is from calling the 'generate' script
# Get the relative path of file_path to data_dir
file_path = metadata.get("file_path")
data_dir = os.path.abspath(data_dir)
if file_path and data_dir:
relative_path = os.path.relpath(file_path, data_dir)
return f"{url_prefix}/data/{relative_path}"
# fallback to URL in metadata (e.g. for websites)
return metadata.get("URL")
@classmethod
def from_source_nodes(
cls, source_nodes: List[NodeWithScore]
) -> List["SourceNodes"]:
return [cls.from_source_node(node) for node in source_nodes]
class ComponentDefinition(BaseModel):
type: str
code: str
filename: str
class UIEvent(Event):
type: str
data: BaseModel
def to_response(self) -> dict:
return {
"type": self.type,
"data": self.data.model_dump(),
}
class ArtifactType(str, Enum):
CODE = "code"
DOCUMENT = "document"
class CodeArtifactData(BaseModel):
file_name: str
code: str
language: str
class DocumentArtifactData(BaseModel):
title: str
content: str
type: Literal["markdown", "html"]
class Artifact(BaseModel):
created_at: Optional[int] = None
type: ArtifactType
data: Union[CodeArtifactData, DocumentArtifactData]
@classmethod
def from_message(cls, message: ChatAPIMessage) -> Optional["Artifact"]:
if not message.annotations or not isinstance(message.annotations, list):
return None
for annotation in message.annotations:
if isinstance(annotation, dict) and annotation.get("type") == "artifact":
try:
artifact = cls.model_validate(annotation.get("data"))
return artifact
except Exception as e:
logger.warning(
f"Failed to parse artifact from annotation: {annotation}. Error: {e}"
)
return None
class ArtifactEvent(Event):
type: str = "artifact"
data: Artifact
def to_response(self) -> dict:
return {
"type": self.type,
"data": self.data.model_dump(),
}
# TODO: For backward compatibility, remove this in a minor release
from llama_index.server.models import * # noqa
@@ -11,7 +11,10 @@ from llama_index.core.agent.workflow.workflow_events import (
AgentSetup,
AgentStream,
)
from llama_index.core.workflow import StopEvent, Workflow
from llama_index.core.workflow import (
StopEvent,
Workflow,
)
from llama_index.server.api.callbacks import (
AgentCallTool,
EventCallback,
@@ -20,9 +23,11 @@ from llama_index.server.api.callbacks import (
SuggestNextQuestions,
)
from llama_index.server.api.callbacks.stream_handler import StreamHandler
from llama_index.server.api.models import ChatRequest
from llama_index.server.api.utils.vercel_stream import VercelStreamResponse
from llama_index.server.models.chat import ChatRequest
from llama_index.server.models.hitl import HumanInputEvent
from llama_index.server.services.llamacloud import LlamaCloudFileService
from llama_index.server.services.workflow import HITLWorkflowService
def chat_router(
@@ -38,7 +43,8 @@ def chat_router(
background_tasks: BackgroundTasks,
) -> StreamingResponse:
try:
user_message = request.messages[-1].to_llamaindex_message()
last_message = request.messages[-1]
user_message = last_message.to_llamaindex_message()
chat_history = [
message.to_llamaindex_message() for message in request.messages[:-1]
]
@@ -48,10 +54,21 @@ def chat_router(
workflow = workflow_factory(chat_request=request)
else:
workflow = workflow_factory()
workflow_handler = workflow.run(
user_msg=user_message.content,
chat_history=chat_history,
)
# Check if we should resume a chat with a human response
human_response = last_message.human_response
if human_response:
ctx = await HITLWorkflowService.load_context(
id=request.id,
workflow=workflow,
data=human_response,
)
workflow_handler = workflow.run(ctx=ctx)
else:
workflow_handler = workflow.run(
user_msg=user_message.content,
chat_history=chat_history,
)
callbacks: list[EventCallback] = [
AgentCallTool(),
@@ -66,7 +83,11 @@ def chat_router(
)
return VercelStreamResponse(
content_generator=_stream_content(stream_handler, request, logger),
content_generator=_stream_content(
stream_handler,
logger,
request.id,
),
)
except Exception as e:
logger.error(e)
@@ -99,8 +120,8 @@ def chat_router(
async def _stream_content(
handler: StreamHandler,
request: ChatRequest,
logger: logging.Logger,
chat_id: str,
) -> AsyncGenerator[str, None]:
async def _text_stream(
event: Union[AgentStream, StopEvent],
@@ -126,6 +147,19 @@ async def _stream_content(
async for chunk in _text_stream(event):
handler.accumulate_text(chunk)
yield VercelStreamResponse.convert_text(chunk)
elif isinstance(event, HumanInputEvent):
ctx = handler.workflow_handler.ctx
if ctx is None:
raise RuntimeError("Context is None")
# Save the context with the HITL event
await HITLWorkflowService.save_context(
id=chat_id,
ctx=ctx,
resume_event_type=event.response_event_type,
)
yield VercelStreamResponse.convert_data(event.to_response())
# Break to stop the stream
break
elif isinstance(event, dict):
yield VercelStreamResponse.convert_data(event)
elif hasattr(event, "to_response"):
@@ -2,7 +2,8 @@ import logging
from typing import List
from fastapi import APIRouter
from llama_index.server.api.models import ComponentDefinition
from llama_index.server.models.ui import ComponentDefinition
from llama_index.server.services.custom_ui import CustomUI
@@ -1,6 +1,7 @@
from typing import List, Optional
from llama_index.server.api.models import Artifact, ChatRequest
from llama_index.server.models.artifacts import Artifact
from llama_index.server.models.chat import ChatRequest
def get_artifacts(chat_request: ChatRequest) -> List[Artifact]:
@@ -0,0 +1,34 @@
from llama_index.server.models.artifacts import (
Artifact,
ArtifactEvent,
ArtifactType,
CodeArtifactData,
DocumentArtifactData,
)
from llama_index.server.models.chat import ChatAPIMessage, ChatRequest
from llama_index.server.models.hitl import HumanInputEvent, HumanResponseEvent
from llama_index.server.models.source_nodes import SourceNodes, SourceNodesEvent
from llama_index.server.models.ui import (
AgentRunEvent,
AgentRunEventType,
ComponentDefinition,
UIEvent,
)
__all__ = [
"Artifact",
"ArtifactEvent",
"ArtifactType",
"DocumentArtifactData",
"CodeArtifactData",
"ChatAPIMessage",
"ChatRequest",
"UIEvent",
"ComponentDefinition",
"AgentRunEvent",
"AgentRunEventType",
"SourceNodes",
"SourceNodesEvent",
"HumanInputEvent",
"HumanResponseEvent",
]
@@ -0,0 +1,60 @@
import logging
from enum import Enum
from typing import Literal, Optional, Union
from llama_index.core.workflow.events import Event
from llama_index.server.models.chat import ChatAPIMessage
from pydantic import BaseModel
logger = logging.getLogger(__name__)
class ArtifactType(str, Enum):
CODE = "code"
DOCUMENT = "document"
class CodeArtifactData(BaseModel):
file_name: str
code: str
language: str
class DocumentArtifactData(BaseModel):
title: str
content: str
type: Literal["markdown", "html"]
class Artifact(BaseModel):
created_at: Optional[int] = None
type: ArtifactType
data: Union[CodeArtifactData, DocumentArtifactData]
@classmethod
def from_message(cls, message: ChatAPIMessage) -> Optional["Artifact"]:
if not message.annotations or not isinstance(message.annotations, list):
return None
for annotation in message.annotations:
if isinstance(annotation, dict) and annotation.get("type") == "artifact":
try:
artifact = cls.model_validate(annotation.get("data"))
return artifact
except Exception as e:
logger.warning(
f"Failed to parse artifact from annotation: {annotation}. Error: {e}"
)
return None
class ArtifactEvent(Event):
type: str = "artifact"
data: Artifact
def to_response(self) -> dict:
return {
"type": self.type,
"data": self.data.model_dump(),
}
@@ -0,0 +1,44 @@
import re
from typing import Any, List, Optional
from pydantic import BaseModel, field_validator
from llama_index.core.types import ChatMessage, MessageRole
class ChatAPIMessage(BaseModel):
role: MessageRole
content: str
annotations: Optional[List[Any]] = None
def to_llamaindex_message(self) -> ChatMessage:
return ChatMessage(role=self.role, content=self.content)
@property
def human_response(self) -> Optional[Any]:
if self.annotations:
for annotation in self.annotations:
if (
isinstance(annotation, dict)
and annotation.get("type") == "human_response"
):
return annotation.get("data", {})
return None
class ChatRequest(BaseModel):
id: str # see https://ai-sdk.dev/docs/reference/ai-sdk-ui/use-chat#id - constant for the same chat session
messages: List[ChatAPIMessage]
data: Optional[Any] = None
@field_validator("messages")
def validate_messages(cls, v: List[ChatAPIMessage]) -> List[ChatAPIMessage]:
if v[-1].role != MessageRole.USER:
raise ValueError("Last message must be from user")
return v
@field_validator("id")
def validate_id(cls, v: str) -> str:
if re.search(r"[^a-zA-Z0-9_-]", v):
raise ValueError("ID contains special characters")
return v
@@ -0,0 +1,51 @@
from typing import Any, Dict, Type, Union
from llama_index.core.workflow.events import (
HumanResponseEvent as FrameworkHumanResponseEvent,
)
from llama_index.core.workflow.events import InputRequiredEvent
from pydantic import BaseModel, Field
class HumanResponseEvent(FrameworkHumanResponseEvent):
"""
Use this event to send a response from a human.
"""
def __init__(self, **kwargs: Any) -> None:
if "response" not in kwargs:
kwargs["response"] = f"Human response with data: {kwargs.get('data', {})}"
super().__init__(**kwargs)
class HumanInputEvent(InputRequiredEvent):
"""
Use this event to request input from a human.
It will block the workflow execution until the human responds.
"""
response_event_type: Type[HumanResponseEvent] = Field(
description="The type of event that the workflow is waiting for.",
)
event_type: str = Field(
description="An identifier for the UI component that will be used to render the input.",
)
data: Union[Dict[str, Any], BaseModel] = Field(
description="The data to be sent to the UI component that will be used to render the input.",
)
def __init__(self, **kwargs: Any) -> None:
# Construct the prefix for InputRequiredEvent
event_type = kwargs.get("event_type", None)
data = kwargs.get("data", None)
if "prefix" not in kwargs:
kwargs["prefix"] = f"Need input for {event_type} with data: {data}"
super().__init__(**kwargs)
def to_response(self) -> dict:
return {
"type": self.event_type,
"data": self.data
if isinstance(self.data, dict)
else self.data.model_dump(),
}
@@ -0,0 +1,49 @@
from typing import Any, Dict, List, Optional
from pydantic import BaseModel
from llama_index.core.schema import NodeWithScore
from llama_index.core.workflow.events import Event
from llama_index.server.utils.chat_file import get_file_url_from_metadata
class SourceNodesEvent(Event):
nodes: List[NodeWithScore]
def to_response(self) -> dict:
return {
"type": "sources",
"data": {
"nodes": [
SourceNodes.from_source_node(node).model_dump()
for node in self.nodes
]
},
}
class SourceNodes(BaseModel):
id: str
metadata: Dict[str, Any]
score: Optional[float]
text: str
url: Optional[str]
@classmethod
def from_source_node(cls, source_node: NodeWithScore) -> "SourceNodes":
metadata = source_node.node.metadata
url = get_file_url_from_metadata(metadata)
return cls(
id=source_node.node.node_id,
metadata=metadata,
score=source_node.score,
text=source_node.node.text, # type: ignore
url=url,
)
@classmethod
def from_source_nodes(
cls, source_nodes: List[NodeWithScore]
) -> List["SourceNodes"]:
return [cls.from_source_node(node) for node in source_nodes]
@@ -0,0 +1,49 @@
import logging
from enum import Enum
from typing import Optional
from pydantic import BaseModel
from llama_index.core.workflow import Event
logger = logging.getLogger("uvicorn")
class AgentRunEventType(Enum):
TEXT = "text"
PROGRESS = "progress"
class AgentRunEvent(Event):
name: str
msg: str
event_type: AgentRunEventType = AgentRunEventType.TEXT
data: Optional[dict] = None
def to_response(self) -> dict:
return {
"type": "agent",
"data": {
"agent": self.name,
"type": self.event_type.value,
"text": self.msg,
"data": self.data,
},
}
class ComponentDefinition(BaseModel):
type: str
code: str
filename: str
class UIEvent(Event):
type: str
data: BaseModel
def to_response(self) -> dict:
return {
"type": self.type,
"data": self.data.model_dump(),
}
@@ -2,7 +2,7 @@ import logging
import os
from typing import List, Optional
from llama_index.server.api.models import ComponentDefinition
from llama_index.server.models.ui import ComponentDefinition
class CustomUI:
@@ -11,7 +11,7 @@ from llama_cloud import ManagedIngestionStatus, PipelineFileCreateCustomMetadata
from pydantic import BaseModel
from llama_index.core.schema import NodeWithScore
from llama_index.server.api.models import SourceNodes
from llama_index.server.models.source_nodes import SourceNodes
from llama_index.server.services.llamacloud.index import get_client
from llama_index.server.utils import llamacloud
@@ -3,14 +3,15 @@ import os
from typing import TYPE_CHECKING, Any, Optional
from llama_cloud import PipelineType
from pydantic import BaseModel, Field, field_validator
from llama_index.core.callbacks import CallbackManager
from llama_index.core.ingestion.api_utils import (
get_client as llama_cloud_get_client,
)
from llama_index.core.settings import Settings
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
from llama_index.server.api.models import ChatRequest
from pydantic import BaseModel, Field, field_validator
from llama_index.server.models.chat import ChatRequest
if TYPE_CHECKING:
from llama_cloud.client import LlamaCloud
@@ -5,7 +5,7 @@ from typing import List, Optional, Union
from llama_index.core.prompts import PromptTemplate
from llama_index.core.settings import Settings
from llama_index.server.api.models import ChatAPIMessage
from llama_index.server.models.chat import ChatAPIMessage
from llama_index.server.prompts import SUGGEST_NEXT_QUESTION_PROMPT
logger = logging.getLogger("uvicorn")
@@ -0,0 +1,106 @@
import json
import logging
from pathlib import Path
from typing import Type
from llama_index.core.workflow import (
Context,
JsonSerializer,
Workflow,
)
from llama_index.server.models.hitl import HumanResponseEvent
from llama_index.server.utils.class_meta_serialization import (
type_from_identifier,
type_identifier,
)
logger = logging.getLogger(__name__)
class HITLWorkflowService:
"""
A service for helping pause and resume a HITL workflow.
"""
# A key in context that stores the HITL event type
HITL_CONTEXT_KEY = "human_response_type"
@staticmethod
def get_storage_path(id: str) -> Path:
storage_dir = Path("output") / "checkpoints"
if not storage_dir.exists():
storage_dir.mkdir(parents=True, exist_ok=True)
return storage_dir / f"{id}.json"
@classmethod
async def save_context(
cls,
id: str,
ctx: Context,
resume_event_type: Type[HumanResponseEvent],
) -> None:
"""
Save the current checkpoint to a file and return the id
Args:
id: The id to save the context to.
ctx: The context to save.
resume_event_type [Optional]: Save workflow context with a resume event.
"""
await ctx.set(
key=cls.HITL_CONTEXT_KEY,
value=type_identifier(resume_event_type),
)
ctx_data = ctx.to_dict(serializer=JsonSerializer())
with open(cls.get_storage_path(id), "w") as f:
json.dump(ctx_data, f)
@classmethod
async def load_context(
cls,
id: str,
workflow: Workflow,
data: dict,
) -> Context:
file_path = cls.get_storage_path(id)
if not file_path.exists():
raise FileNotFoundError(f"No checkpoint found for id: {id}")
try:
with open(file_path, "r") as f:
ctx_data = json.load(f)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid checkpoint data for id {id}: {e}")
ctx = Context.from_dict(
workflow=workflow,
data=ctx_data,
serializer=JsonSerializer(),
)
resume_event = await cls._construct_resume_event(ctx, data)
ctx.send_event(resume_event)
return ctx
@classmethod
async def _construct_resume_event(
cls, context: Context, data: dict
) -> HumanResponseEvent:
"""
Get the HITL event from the context.
"""
event_type_str = await context.get(cls.HITL_CONTEXT_KEY)
if not event_type_str:
raise ValueError(
"Cannot resume the workflow because there is no resume event type in the context"
)
resume_event_type = type_from_identifier(event_type_str)
if not issubclass(resume_event_type, HumanResponseEvent):
raise ValueError(
f"Cannot resume the workflow because the resume event type {resume_event_type} is not a HumanResponseEvent"
)
try:
return resume_event_type(**data)
except Exception as e:
raise ValueError(
f"Error constructing resume event: {e}. "
f"Make sure the provided data is valid for the event type {resume_event_type}"
)
@@ -14,7 +14,7 @@ from llama_index.core.tools import (
ToolSelection,
)
from llama_index.core.workflow import Context
from llama_index.server.api.models import AgentRunEvent, AgentRunEventType
from llama_index.server.models.ui import AgentRunEvent, AgentRunEventType
from llama_index.core.agent.workflow.workflow_events import ToolCall, ToolCallResult
logger = logging.getLogger("uvicorn")
@@ -0,0 +1,36 @@
import os
from typing import Any, Dict, Optional
from llama_index.server.settings import server_settings
from llama_index.server.utils import llamacloud
def get_file_url_from_metadata(
metadata: Dict[str, Any],
data_dir: Optional[str] = None,
) -> Optional[str]:
"""
Get the URL of a file from the source node metadata.
"""
url_prefix = server_settings.file_server_url_prefix
if data_dir is None:
data_dir = "data"
file_name = metadata.get("file_name")
if file_name and url_prefix:
if llamacloud.is_llamacloud_file(metadata):
file_name = llamacloud.get_local_file_name(metadata)
return f"{url_prefix}/output/llamacloud/{file_name}"
is_private = metadata.get("private", "false") == "true"
if is_private:
# file is a private upload
return f"{url_prefix}/output/uploaded/{file_name}"
# file is from calling the 'generate' script
# Get the relative path of file_path to data_dir
file_path = metadata.get("file_path")
data_dir = os.path.abspath(data_dir)
if file_path and data_dir:
relative_path = os.path.relpath(file_path, data_dir)
return f"{url_prefix}/data/{relative_path}"
# fallback to URL in metadata (e.g. for websites)
return metadata.get("URL")
@@ -0,0 +1,30 @@
# Helper functions for serializing and deserializing class metadata.
import importlib
from typing import Type
def type_identifier(type: Type) -> str:
"""
Get the identifier of a type.
"""
return f"{type.__module__}.{type.__qualname__}"
def type_from_identifier(identifier: str) -> Type:
"""
Get the type from an identifier.
"""
if not identifier or "." not in identifier:
raise ValueError(f"Invalid type identifier format: {identifier}")
try:
module, qualname = identifier.rsplit(".", 1)
imported_module = importlib.import_module(module)
if not hasattr(imported_module, qualname):
raise AttributeError(f"Module '{module}' has no attribute '{qualname}'")
return getattr(imported_module, qualname)
except ImportError as e:
raise ImportError(f"Failed to import module '{module}': {e}")
except Exception as e:
raise RuntimeError(
f"Failed to resolve type from identifier '{identifier}': {e}"
)
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@create-llama/llama-index-server",
"private": true,
"version": "0.1.18",
"version": "0.1.20",
"type": "module",
"scripts": {
"prebuild": "uv run -- scripts/frontend.py --mode copy",
+1 -1
View File
@@ -1,6 +1,6 @@
[project]
name = "llama-index-server"
version = "0.1.18"
version = "0.1.20"
description = "llama-index fastapi server"
readme = "README.md"
license = "MIT"
@@ -1,4 +1,5 @@
import logging
from typing import AsyncGenerator, Callable
from unittest.mock import AsyncMock, MagicMock
import pytest
@@ -7,31 +8,32 @@ from httpx import ASGITransport, AsyncClient
from llama_index.core.workflow import StopEvent, Workflow
from llama_index.core.workflow.handler import WorkflowHandler
from llama_index.server.api.models import ChatAPIMessage, ChatRequest
from llama_index.server.api.routers.chat import chat_router
from llama_index.server.models.chat import ChatAPIMessage, ChatRequest, MessageRole
@pytest.fixture()
def logger():
def logger() -> logging.Logger:
return logging.getLogger("test")
@pytest.fixture()
def chat_request():
def chat_request() -> ChatRequest:
"""Create a simple chat request with one user message."""
return ChatRequest(
messages=[ChatAPIMessage(role="user", content="Hello, how are you?")]
id="test",
messages=[ChatAPIMessage(role=MessageRole.USER, content="Hello, how are you?")],
)
@pytest.fixture()
def mock_workflow():
def mock_workflow() -> MagicMock:
"""Create a mock workflow that returns a simple response."""
workflow = MagicMock(spec=Workflow)
handler = AsyncMock(spec=WorkflowHandler)
# Setup the handler to stream a simple response event
async def mock_stream_events():
async def mock_stream_events() -> AsyncGenerator[StopEvent, None]:
yield StopEvent(result="I'm doing well, thank you for asking!")
handler.stream_events.return_value = mock_stream_events()
@@ -41,17 +43,21 @@ def mock_workflow():
@pytest.fixture()
def workflow_factory(mock_workflow):
def workflow_factory(mock_workflow: MagicMock) -> Callable[[], MagicMock]:
"""Create a factory function that returns our mock workflow."""
def factory(verbose=False):
def factory(verbose: bool = False) -> MagicMock:
return mock_workflow
return factory
@pytest.mark.asyncio()
async def test_chat_router(chat_request, workflow_factory, logger):
async def test_chat_router(
chat_request: ChatRequest,
workflow_factory: Callable[[], MagicMock],
logger: logging.Logger,
) -> None:
"""Test that the chat router handles a request correctly."""
# Create a FastAPI app and mount our router
app = FastAPI()
@@ -90,14 +96,14 @@ async def test_chat_router(chat_request, workflow_factory, logger):
@pytest.mark.asyncio()
async def test_chat_with_agent_workflow(logger):
async def test_chat_with_agent_workflow(logger: logging.Logger) -> None:
"""Test that the chat router works with a workflow that mimics an agent workflow."""
# Create a simple workflow that mimics an agent workflow
mock_workflow = MagicMock(spec=Workflow)
handler = AsyncMock(spec=WorkflowHandler)
# Setup the handler to stream a simple response about weather
async def mock_stream_events():
async def mock_stream_events() -> AsyncGenerator[StopEvent, None]:
yield StopEvent(
result="The weather in New York is sunny. I used the weather tool to get this information."
)
@@ -106,7 +112,7 @@ async def test_chat_with_agent_workflow(logger):
mock_workflow.run.return_value = handler
# Create a factory function that returns our mock workflow
def workflow_factory(verbose=False):
def workflow_factory(verbose: bool = False) -> MagicMock:
return mock_workflow
# Create a FastAPI app and mount our router
@@ -116,9 +122,12 @@ async def test_chat_with_agent_workflow(logger):
# Create a chat request asking about weather
chat_request = ChatRequest(
id="test",
messages=[
ChatAPIMessage(role="user", content="What's the weather in New York?")
]
ChatAPIMessage(
role=MessageRole.USER, content="What's the weather in New York?"
)
],
)
# Make a request to the chat endpoint
@@ -9,9 +9,9 @@ from llama_index.core.agent.workflow.workflow_events import AgentStream
from llama_index.core.types import MessageRole
from llama_index.core.workflow import StopEvent
from llama_index.core.workflow.handler import WorkflowHandler
from llama_index.server.api.models import ChatAPIMessage, ChatRequest
from llama_index.server.api.routers.chat import _stream_content
from llama_index.server.api.utils.vercel_stream import VercelStreamResponse
from llama_index.server.models.chat import ChatAPIMessage, ChatRequest
@pytest.fixture()
@@ -22,7 +22,8 @@ def logger() -> logging.Logger:
@pytest.fixture()
def chat_request() -> ChatRequest:
return ChatRequest(
messages=[ChatAPIMessage(role=MessageRole.USER, content="test message")]
id="test",
messages=[ChatAPIMessage(role=MessageRole.USER, content="test message")],
)
@@ -50,7 +51,7 @@ class TestEventStream:
result = [
chunk
async for chunk in _stream_content(
mock_workflow_handler, chat_request, logger
mock_workflow_handler, logger, chat_request.id
)
]
@@ -75,7 +76,7 @@ class TestEventStream:
result = [
chunk
async for chunk in _stream_content(
mock_workflow_handler, chat_request, logger
mock_workflow_handler, logger, chat_request.id
)
]
@@ -99,7 +100,7 @@ class TestEventStream:
result = [
chunk
async for chunk in _stream_content(
mock_workflow_handler, chat_request, logger
mock_workflow_handler, logger, chat_request.id
)
]
@@ -124,7 +125,7 @@ class TestEventStream:
result = [
chunk
async for chunk in _stream_content(
mock_workflow_handler, chat_request, logger
mock_workflow_handler, logger, chat_request.id
)
]
@@ -148,7 +149,7 @@ class TestEventStream:
result = [
chunk
async for chunk in _stream_content(
mock_workflow_handler, chat_request, logger
mock_workflow_handler, logger, chat_request.id
)
]
@@ -171,7 +172,7 @@ class TestEventStream:
result = [
chunk
async for chunk in _stream_content(
mock_workflow_handler, chat_request, logger
mock_workflow_handler, logger, chat_request.id
)
]
@@ -196,7 +197,7 @@ class TestEventStream:
result = [
chunk
async for chunk in _stream_content(
mock_workflow_handler, chat_request, logger
mock_workflow_handler, logger, chat_request.id
)
]
+1 -1
View File
@@ -1936,7 +1936,7 @@ wheels = [
[[package]]
name = "llama-index-server"
version = "0.1.17"
version = "0.1.19"
source = { editable = "." }
dependencies = [
{ name = "cachetools" },