feat: Add config for suggest next question (#640)

* Enhance LlamaIndexServer with next question suggestion feature

- Added `suggest_next_questions` parameter to the LlamaIndexServer for suggesting follow-up questions after the assistant's response.
- Updated README.md to document the new configuration option.
- Introduced `SUGGEST_NEXT_QUESTION_PROMPT` in prompts.py for customizable question suggestions.
- Bumped version to 0.1.16 in uv.lock to reflect the new feature.

* Implement next question suggestion feature in LlamaIndexServer

- Added `suggestNextQuestions` option to LlamaIndexServer for suggesting follow-up questions after the assistant's response.
- Updated README.md to include the new configuration option.
- Modified example workflow to utilize the new feature.
- Enhanced chat handler to conditionally send suggested questions based on the new option.

* add changeset

* remove log

* bundle ui instead of download

* check test

* check test

check test

check test

check test

check test

check test

check test

check test

check test

check test

* fix tests

* Update artifact path in workflow and clarify README.md text

- Changed the artifact path in the GitHub Actions workflow from `python/llama-index-server/dist/` to `dist/`.
- Revised README.md to clarify the default prompt used for the `suggest_next_questions` configuration option.

* support changeset for python

* refactor: update llama-index-server structure and workflows

* fix workflows

* fix workflows

* fix workflows

* add changeset

* fix cannot release python

* Update packages/server/README.md

Co-authored-by: Thuc Pham <51660321+thucpn@users.noreply.github.com>

* Update starter questions in LlamaIndex App and add TODO for suggestion feature in chat API

---------

Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
Co-authored-by: Thuc Pham <51660321+thucpn@users.noreply.github.com>
This commit is contained in:
Huu Le
2025-05-23 12:48:45 +07:00
committed by GitHub
parent bbae802bed
commit 0bc5a0d882
34 changed files with 527 additions and 477 deletions
+5
View File
@@ -0,0 +1,5 @@
---
"@create-llama/llama-index-server": patch
---
Add suggestNextQuestions config
+5
View File
@@ -0,0 +1,5 @@
---
"@llamaindex/server": patch
---
Add suggestNextQuestions config
@@ -16,6 +16,16 @@ jobs:
- uses: pnpm/action-setup@v3
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- name: Setup Node.js
uses: actions/setup-node@v4
with:
+7
View File
@@ -17,6 +17,11 @@ jobs:
- uses: pnpm/action-setup@v3
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install uv
uses: astral-sh/setup-uv@v3
@@ -56,3 +61,5 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
@@ -1,138 +0,0 @@
name: Release llama-index-server
on:
push:
branches:
- main
paths:
- "python/llama-index-server/**"
- ".github/workflows/release_llama_index_server.yml"
pull_request:
types:
- closed
concurrency: ${{ github.workflow }}-${{ github.ref }}
jobs:
release:
name: Create Release PR
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./python/llama-index-server
if: |
github.event_name == 'push' &&
!startsWith(github.ref, 'refs/heads/release/llama-index-server-v') &&
!contains(github.event.head_commit.message, 'Release: llama-index-server v')
steps:
- name: Checkout Repository
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
shell: bash
run: uv sync --all-extras --dev
- name: Setup Git
run: |
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
- name: Bump patch version
shell: bash
run: |
uvx --from=toml-cli toml set --toml-path=pyproject.toml project.version $(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version | awk -F. '{$NF = $NF + 1;}1' OFS=.)
git add pyproject.toml
git commit -m "chore(release): bump llama-index-server version to $(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)"
- name: Get current version
id: get_version
shell: bash
run: |
version=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)
echo "current_version=${version}" >> "$GITHUB_OUTPUT"
- name: Create Release PR
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.GITHUB_TOKEN }}
commit-message: "Release: llama-index-server v${{ steps.get_version.outputs.current_version }}"
title: "Release: llama-index-server v${{ steps.get_version.outputs.current_version }}"
body: |
This PR was automatically created to release a new version of the llama-index-server package.
Version: ${{ steps.get_version.outputs.current_version }}
Please review the changes and merge to trigger the release.
branch: release/llama-index-server-v${{ steps.get_version.outputs.current_version }}
base: main
labels: release, llama-index-server
publish:
name: Publish to PyPI
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./python/llama-index-server
if: |
github.event_name == 'pull_request' &&
github.event.pull_request.merged == true &&
startsWith(github.event.pull_request.title, 'Release: llama-index-server') &&
startsWith(github.event.pull_request.head.ref, 'release/llama-index-server-v')
steps:
- name: Checkout Repository
uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
shell: bash
run: uv sync --all-extras
- name: Get current version
id: get_version
shell: bash
run: |
version=$(uvx --from=toml-cli toml get --toml-path=pyproject.toml project.version)
echo "current_version=${version}" >> "$GITHUB_OUTPUT"
- name: Build package
shell: bash
run: uv build --no-sources
- name: Publish to PyPI
shell: bash
run: uv publish --token ${{ secrets.PYPI_TOKEN }}
- name: Create GitHub Release
uses: softprops/action-gh-release@v2
with:
tag_name: llama-index-server-v${{ steps.get_version.outputs.current_version }}
name: "llama-index-server v${{ steps.get_version.outputs.current_version }}"
body: |
Release of llama-index-server v${{ steps.get_version.outputs.current_version }}
draft: false
prerelease: false
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+57 -20
View File
@@ -5,6 +5,7 @@ on:
env:
PYTHON_VERSION: "3.9"
UI_TEST: "true"
jobs:
unit-test:
@@ -19,20 +20,27 @@ jobs:
python-version: ["3.9"]
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
- name: Setup Node.js
uses: actions/setup-node@v4
with:
python-version: ${{ matrix.python-version }}
node-version-file: ".nvmrc"
cache: "pnpm"
- name: Install dependencies
shell: bash
run: uv sync --all-extras --dev
run: pnpm install && pnpm build
- name: Run unit tests
shell: bash
@@ -46,20 +54,20 @@ jobs:
working-directory: python/llama-index-server
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install dependencies
shell: bash
run: uv sync --all-extras --dev
run: pnpm install
- name: Run mypy
shell: bash
@@ -73,27 +81,56 @@ jobs:
working-directory: python/llama-index-server
steps:
- uses: actions/checkout@v4
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- uses: pnpm/action-setup@v3
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install build package
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version-file: ".nvmrc"
cache: "pnpm"
- name: Install dependencies
run: pnpm install && pnpm build
- name: Build package
shell: bash
run: uv sync --all-extras
run: uv build
- name: Get the absolute wheel file path and save it to the output
shell: bash
id: get_whl_path
run: |
WHL_FILE=$(readlink -f dist/*.whl)
echo "whl_file=$WHL_FILE" >> $GITHUB_OUTPUT
- name: Test import
shell: bash
run: uv run python -c "from llama_index.server import LlamaIndexServer"
working-directory: ${{ github.workspace }}
env:
WHL_FILE: ${{ steps.get_whl_path.outputs.whl_file }}
run: |
uv run --with $WHL_FILE python -c "from llama_index.server import LlamaIndexServer"
- name: Check frontend resources is present
shell: bash
working-directory: ${{ github.workspace }}
env:
WHL_FILE: ${{ steps.get_whl_path.outputs.whl_file }}
run: |
uv run --with $WHL_FILE python -c "from llama_index.server.chat_ui import check_ui_resources; check_ui_resources()"
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: llama-index-server
path: python/llama-index-server/dist/
path: dist/
+6 -3
View File
@@ -13,7 +13,8 @@
},
"license": "MIT",
"workspaces": [
"packages/*"
"packages/*",
"python/*"
],
"scripts": {
"dev": "pnpm -r dev",
@@ -24,8 +25,10 @@
"format:write": "prettier --ignore-unknown --write .",
"prepare": "husky",
"new-snapshot": "pnpm -r build && changeset version --snapshot",
"new-version": "pnpm -r build && changeset version",
"release": "pnpm -r build && changeset publish",
"new-version-python": "pnpm --filter @create-llama/llama-index-server new-version",
"new-version": "pnpm -r build && changeset version && pnpm new-version-python",
"release-python": "pnpm --filter @create-llama/llama-index-server release",
"release": "pnpm -r build && changeset publish && pnpm release-python",
"release-snapshot": "pnpm -r build && changeset publish --tag snapshot"
},
"devDependencies": {
+1
View File
@@ -65,6 +65,7 @@ The `LlamaIndexServer` accepts the following configuration options:
- `componentsDir`: The directory for custom UI components rendering events emitted by the workflow. The default is undefined, which does not render custom UI components.
- `llamaCloudIndexSelector`: Whether to show the LlamaCloud index selector in the chat UI (requires `LLAMA_CLOUD_API_KEY` to be set in the environment variables) (default: `false`)
- `dev_mode`: When enabled, you can update workflow code in the UI and see the changes immediately. It's currently in beta and only supports updating workflow code at `app/src/workflow.ts`. Please start server in dev mode (`npm run dev`) to use see this reload feature enabled.
- `suggestNextQuestions`: Whether to suggest next questions after the assistant's response (default: `true`). You can change the prompt for the next questions by setting the `NEXT_QUESTION_PROMPT` environment variable.
LlamaIndexServer accepts all the configuration options from Nextjs Custom Server such as `port`, `hostname`, `dev`, etc.
See all Nextjs Custom Server options [here](https://nextjs.org/docs/app/building-your-application/configuring/custom-server).
@@ -35,6 +35,7 @@ export const workflowFactory = async () => {
new LlamaIndexServer({
workflow: workflowFactory,
suggestNextQuestions: true,
uiConfig: {
appTitle: "LlamaIndex App",
starterQuestions: ["What is the color of the dog?"],
@@ -47,6 +47,7 @@ export async function POST(req: NextRequest) {
);
const dataStream = toDataStream(workflowEventStream, {
// TODO: Support enable/disable suggestion
callbacks: {
onFinal: async (completion, dataStreamWriter) => {
chatHistory.push({
+4 -1
View File
@@ -16,6 +16,7 @@ export const handleChat = async (
req: IncomingMessage,
res: ServerResponse,
workflowFactory: WorkflowFactory,
suggestNextQuestions: boolean,
) => {
try {
const body = await parseRequestBody(req);
@@ -53,7 +54,9 @@ export const handleChat = async (
role: "assistant" as MessageType,
content: completion,
});
await sendSuggestedQuestionsEvent(dataStreamWriter, chatHistory);
if (suggestNextQuestions) {
await sendSuggestedQuestionsEvent(dataStreamWriter, chatHistory);
}
},
},
});
+1
View File
@@ -1,4 +1,5 @@
export * from "./events";
export * from "./prompts";
export * from "./server";
export * from "./types";
export { generateEventComponent } from "./utils/gen-ui";
+14
View File
@@ -0,0 +1,14 @@
export const NEXT_QUESTION_PROMPT = `You're a helpful assistant!
Your task is to suggest the next question that user might ask.
Here is the conversation history
---------------------
{conversation}
---------------------
Given the conversation history, please give me 3 questions that user might ask next!
Your answer should be wrapped in three sticks which follows the following format:
\`\`\`
<question 1>
<question 2>
<question 3>
\`\`\`
`;
+11 -3
View File
@@ -18,13 +18,15 @@ export class LlamaIndexServer {
app: ReturnType<typeof next>;
workflowFactory: () => Promise<Workflow> | Workflow;
componentsDir?: string | undefined;
suggestNextQuestions: boolean;
constructor(options: LlamaIndexServerOptions) {
const { workflow, ...nextAppOptions } = options;
const { workflow, suggestNextQuestions, ...nextAppOptions } = options;
this.app = next({ dev, dir: nextDir, ...nextAppOptions });
this.port = nextAppOptions.port ?? parseInt(process.env.PORT || "3000", 10);
this.workflowFactory = workflow;
this.componentsDir = options.uiConfig?.componentsDir;
this.suggestNextQuestions = suggestNextQuestions ?? true;
if (this.componentsDir) {
this.createComponentsDir(this.componentsDir);
@@ -52,7 +54,8 @@ export class LlamaIndexServer {
LLAMA_CLOUD_API: ${JSON.stringify(llamaCloudApi)},
STARTER_QUESTIONS: ${JSON.stringify(starterQuestions)},
COMPONENTS_API: ${JSON.stringify(componentsApi)},
DEV_MODE: ${JSON.stringify(devMode)}
DEV_MODE: ${JSON.stringify(devMode)},
SUGGEST_NEXT_QUESTIONS: ${JSON.stringify(this.suggestNextQuestions)}
}
`;
fs.writeFileSync(configFile, content);
@@ -77,7 +80,12 @@ export class LlamaIndexServer {
// because of https://github.com/vercel/next.js/discussions/79402 we can't use route.ts here, so we need to call this custom route
// when calling `pnpm eject`, the user will get an equivalent route at [path to chat route.ts]
// make sure to keep its semantic in sync with handleChat
return handleChat(req, res, this.workflowFactory);
return handleChat(
req,
res,
this.workflowFactory,
this.suggestNextQuestions,
);
}
if (
+1
View File
@@ -23,4 +23,5 @@ export type UIConfig = {
export type LlamaIndexServerOptions = NextAppOptions & {
workflow: WorkflowFactory;
uiConfig?: UIConfig;
suggestNextQuestions?: boolean;
};
+4 -18
View File
@@ -1,19 +1,7 @@
import { getEnv } from "@llamaindex/env";
import type { DataStreamWriter } from "ai";
import { type ChatMessage, Settings } from "llamaindex";
const NEXT_QUESTION_PROMPT = `You're a helpful assistant! Your task is to suggest the next question that user might ask.
Here is the conversation history
---------------------
{conversation}
---------------------
Given the conversation history, please give me 3 questions that user might ask next!
Your answer should be wrapped in three sticks which follows the following format:
\`\`\`
<question 1>
<question 2>
<question 3>
\`\`\`
`;
import { NEXT_QUESTION_PROMPT } from "../prompts";
export const sendSuggestedQuestionsEvent = async (
streamWriter: DataStreamWriter,
@@ -32,10 +20,8 @@ export async function generateNextQuestions(conversation: ChatMessage[]) {
const conversationText = conversation
.map((message) => `${message.role}: ${message.content}`)
.join("\n");
const message = NEXT_QUESTION_PROMPT.replace(
"{conversation}",
conversationText,
);
const promptTemplate = getEnv("NEXT_QUESTION_PROMPT") || NEXT_QUESTION_PROMPT;
const message = promptTemplate.replace("{conversation}", conversationText);
try {
const response = await Settings.llm.complete({ prompt: message });
+6
View File
@@ -402,6 +402,12 @@ importers:
specifier: ^5.3.2
version: 5.8.3
python/llama-index-server:
dependencies:
'@llamaindex/server':
specifier: workspace:*
version: link:../../packages/server
packages:
'@ai-sdk/provider-utils@2.2.7':
+1
View File
@@ -1,3 +1,4 @@
packages:
- "packages/*"
- "packages/server/examples"
- "python/*"
+1
View File
@@ -5,6 +5,7 @@
**/venv
**/env
**/llama-index-server.egg-info
llama_index/server/resources/ui
# Jupyter files
**/*.ipynb
+1
View File
@@ -84,6 +84,7 @@ The LlamaIndexServer accepts the following configuration parameters:
- `component_dir`: The directory for custom UI components rendering events emitted by the workflow. The default is None, which does not render custom UI components.
- `llamacloud_index_selector`: Whether to show the LlamaCloud index selector in the chat UI (default: False). Requires `LLAMA_CLOUD_API_KEY` to be set.
- `dev_mode`: When enabled, you can update workflow code in the UI and see the changes immediately. It's currently in beta and only supports updating workflow code at `app/workflow.py`. You might also need to set `env="dev"` and start the server with the reload feature enabled.
- `suggest_next_questions`: Whether to suggest next questions after the assistant's response (default: True). You can change the prompt for the next questions by setting the `NEXT_QUESTION_PROMPT` environment variable. The default prompt used is defined in `llama_index.server.prompts.SUGGEST_NEXT_QUESTION_PROMPT`.
- `verbose`: Enable verbose logging
- `api_prefix`: API route prefix (default: "/api")
- `server_url`: The deployment URL of the server (default is None)
@@ -7,11 +7,13 @@ from llama_index.server import LlamaIndexServer, UIConfig
def create_app() -> FastAPI:
app = LlamaIndexServer(
workflow_factory=create_workflow,
suggest_next_questions=True,
env="dev",
ui_config=UIConfig(
app_title="Artifact",
starter_questions=[
"Tell me a funny joke.",
"Tell me some jokes about AI.",
"Tell me a funny joke",
"Tell me some jokes about AI",
],
component_dir="components",
dev_mode=True, # To show the dev UI, should disable this in production
@@ -3,7 +3,7 @@ import os
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Union
from pydantic import BaseModel, Field, field_validator
from pydantic import BaseModel, field_validator
from llama_index.core.schema import NodeWithScore
from llama_index.core.types import ChatMessage, MessageRole
@@ -13,13 +13,6 @@ from llama_index.server.settings import server_settings
logger = logging.getLogger("uvicorn")
class ChatConfig(BaseModel):
next_question_suggestions: bool = Field(
default=True,
description="Whether to suggest next questions",
)
class ChatAPIMessage(BaseModel):
role: MessageRole
content: str
@@ -32,7 +25,6 @@ class ChatAPIMessage(BaseModel):
class ChatRequest(BaseModel):
messages: List[ChatAPIMessage]
data: Optional[Any] = None
config: Optional[ChatConfig] = ChatConfig()
@field_validator("messages")
def validate_messages(cls, v: List[ChatAPIMessage]) -> List[ChatAPIMessage]:
@@ -28,6 +28,7 @@ from llama_index.server.services.llamacloud import LlamaCloudFileService
def chat_router(
workflow_factory: Callable[..., Workflow],
logger: logging.Logger,
suggest_next_questions: bool = True,
) -> APIRouter:
router = APIRouter(prefix="/chat")
@@ -56,7 +57,7 @@ def chat_router(
SourceNodesFromToolCall(),
LlamaCloudFileDownload(background_tasks),
]
if request.config and request.config.next_question_suggestions:
if suggest_next_questions:
callbacks.append(SuggestNextQuestions(request))
stream_handler = StreamHandler(
workflow_handler=workflow_handler,
@@ -1,55 +1,87 @@
import importlib.resources
import logging
import shutil
from pathlib import Path
from typing import Optional
import requests
CHAT_UI_VERSION = "0.2.1"
PACKAGE_NAME = "llama_index.server.resources"
RESOURCE_DIR_NAME = "ui"
def download_chat_ui(
def check_ui_resources() -> None:
"""
Checks if the UI resources directory exists in the specified package and lists its contents.
Raises a FileNotFoundError with a clear message if the directory is missing.
"""
try:
_ = importlib.resources.files(PACKAGE_NAME).joinpath(RESOURCE_DIR_NAME)
except Exception as e:
raise Exception("UI resources not found in bundled package") from e
def copy_bundled_chat_ui(
logger: Optional[logging.Logger] = None, target_path: str = ".ui"
) -> None:
# Check if the UI resources directory exists
check_ui_resources()
if logger is None:
logger = logging.getLogger("uvicorn")
path = Path(target_path)
temp_dir = _download_package(_get_download_link(CHAT_UI_VERSION))
_copy_ui_files(temp_dir, path)
logger.info("Chat UI downloaded and copied to static folder")
destination_path = Path(target_path)
destination_path.mkdir(parents=True, exist_ok=True)
def _get_download_link(version: str) -> str:
"""Get the download link for the chat UI from the npm registry."""
return f"https://registry.npmjs.org/@llamaindex/server/-/server-{version}.tgz"
def _download_package(url: str) -> Path:
"""Download tar.gz file and extract all files into a temporary directory."""
import io
import tarfile
import tempfile
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"})
content = response.content
temp_dir = Path(tempfile.mkdtemp())
with tarfile.open(fileobj=io.BytesIO(content), mode="r:gz") as tar:
tar.extractall(path=temp_dir)
return temp_dir
def _copy_ui_files(temp_dir: Path, target_path: Path) -> None:
"""Copy files from the .next directory to the static directory."""
target_path.mkdir(parents=True, exist_ok=True)
next_dir = temp_dir / "package/dist/static"
if next_dir.exists():
for item in next_dir.iterdir():
dest = target_path / item.name
try:
# Clear the destination directory first to avoid stale files
for item in destination_path.iterdir():
if item.is_dir():
shutil.copytree(item, dest, dirs_exist_ok=True)
shutil.rmtree(item)
else:
shutil.copy2(item, dest)
item.unlink()
# Get a reference to the source directory using importlib.resources.files (Python 3.9+)
source_dir_ref = importlib.resources.files(PACKAGE_NAME).joinpath(
RESOURCE_DIR_NAME
)
if not source_dir_ref.is_dir():
logger.error(
f"Static UI resource directory '{RESOURCE_DIR_NAME}' not found in package '{PACKAGE_NAME}'. Path: {source_dir_ref}"
)
logger.error(
"Ensure the static files are correctly bundled with the package and the path is correct."
)
return
for source_item_path_ref in source_dir_ref.iterdir():
# Skip __init__.py or other non-static files if present (though less likely needed with direct iteration)
if source_item_path_ref.name.startswith(
"__"
) or source_item_path_ref.name.endswith(".py"):
continue
dest_item_path = destination_path / source_item_path_ref.name
# importlib.resources.as_file is needed to get a concrete path for shutil operations
with importlib.resources.as_file(
source_item_path_ref
) as concrete_source_item_path:
if concrete_source_item_path.is_dir():
shutil.copytree(
concrete_source_item_path, dest_item_path, dirs_exist_ok=True
)
elif concrete_source_item_path.is_file():
shutil.copy2(concrete_source_item_path, dest_item_path)
else:
logger.warning(
f"Skipping resource '{source_item_path_ref.name}' as it's not a file or directory."
)
logger.info(f"Chat UI files copied from package to '{destination_path}'")
except FileNotFoundError:
logger.error(
"Oops! The chat UI files are not found. Please report this issue to the LlamaIndex team."
)
except Exception as e:
logger.error(f"Failed to copy bundled chat UI files: {e}.")
@@ -0,0 +1,15 @@
# Used by SuggestNextQuestionsService
# Override this prompt by setting the `NEXT_QUESTION_PROMPT` environment variable
SUGGEST_NEXT_QUESTION_PROMPT = """You're a helpful assistant! Your task is to suggest the next questions that user might interested in to keep the conversation going.
Here is the conversation history
---------------------
{conversation}
---------------------
Given the conversation history, please give me 3 questions that user might ask next!
Your answer should be wrapped in three sticks without any index numbers and follows the following format:
\`\`\`
<question 1>
<question 2>
<question 3>
\`\`\`
"""
@@ -15,7 +15,7 @@ from llama_index.server.api.routers import (
custom_components_router,
dev_router,
)
from llama_index.server.chat_ui import download_chat_ui
from llama_index.server.chat_ui import copy_bundled_chat_ui
from llama_index.server.settings import server_settings
@@ -68,11 +68,12 @@ class LlamaIndexServer(FastAPI):
self,
workflow_factory: Callable[..., Workflow],
logger: Optional[logging.Logger] = None,
use_default_routers: Optional[bool] = True,
use_default_routers: Optional[bool] = None,
env: Optional[str] = None,
ui_config: Optional[Union[UIConfig, dict]] = None,
server_url: Optional[str] = None,
api_prefix: Optional[str] = None,
suggest_next_questions: Optional[bool] = None,
verbose: bool = False,
*args: Any,
**kwargs: Any,
@@ -88,6 +89,7 @@ class LlamaIndexServer(FastAPI):
ui_config: The configuration for the chat UI.
server_url: The URL of the server.
api_prefix: The prefix for the API endpoints.
suggest_next_questions: Whether to suggest next questions after the assistant's response.
verbose: Whether to show verbose logs.
"""
super().__init__(*args, **kwargs)
@@ -95,7 +97,12 @@ class LlamaIndexServer(FastAPI):
self.workflow_factory = workflow_factory
self.logger = logger or logging.getLogger("uvicorn")
self.verbose = verbose
self.use_default_routers = use_default_routers or True
self.use_default_routers = (
True if use_default_routers is None else use_default_routers
)
self.suggest_next_questions = (
True if suggest_next_questions is None else suggest_next_questions
)
if ui_config is None:
self.ui_config = UIConfig()
elif isinstance(ui_config, dict):
@@ -146,6 +153,7 @@ class LlamaIndexServer(FastAPI):
chat_router(
self.workflow_factory,
self.logger,
self.suggest_next_questions,
),
prefix=server_settings.api_prefix,
)
@@ -177,9 +185,11 @@ class LlamaIndexServer(FastAPI):
if not os.path.exists(self.ui_config.ui_path):
os.makedirs(self.ui_config.ui_path)
self.logger.warning(
f"UI files not found, downloading UI to {self.ui_config.ui_path}"
f"UI files not found at {self.ui_config.ui_path}. Copying bundled UI files."
)
copy_bundled_chat_ui(
logger=self.logger, target_path=self.ui_config.ui_path
)
download_chat_ui(logger=self.logger, target_path=self.ui_config.ui_path)
self._mount_static_files(
directory=self.ui_config.ui_path,
path="/",
@@ -6,6 +6,7 @@ from typing import List, Optional, Union
from llama_index.core.prompts import PromptTemplate
from llama_index.core.settings import Settings
from llama_index.server.api.models import ChatAPIMessage
from llama_index.server.prompts import SUGGEST_NEXT_QUESTION_PROMPT
logger = logging.getLogger("uvicorn")
@@ -15,28 +16,11 @@ class SuggestNextQuestionsService:
Suggest the next questions that user might ask based on the conversation history.
"""
prompt = PromptTemplate(
r"""
You're a helpful assistant! Your task is to suggest the next questions that user might interested in to keep the conversation going.
Here is the conversation history
---------------------
{conversation}
---------------------
Given the conversation history, please give me 3 questions that user might ask next!
Your answer should be wrapped in three sticks without any index numbers and follows the following format:
\`\`\`
<question 1>
<question 2>
<question 3>
\`\`\`
"""
)
@classmethod
def get_configured_prompt(cls) -> PromptTemplate:
prompt = os.getenv("NEXT_QUESTION_PROMPT", None)
if not prompt:
return cls.prompt
return PromptTemplate(SUGGEST_NEXT_QUESTION_PROMPT)
return PromptTemplate(prompt)
@classmethod
+16
View File
@@ -0,0 +1,16 @@
{
"name": "@create-llama/llama-index-server",
"private": true,
"version": "0.1.16",
"type": "module",
"scripts": {
"prebuild": "uv run -- scripts/frontend.py --mode copy",
"build": "uv build",
"clean": "rm -rf dist build *.egg-info",
"new-version": "uv run python scripts/sync_version.py && git add pyproject.toml",
"release": "uv publish"
},
"dependencies": {
"@llamaindex/server": "workspace:*"
}
}
+2 -1
View File
@@ -63,5 +63,6 @@ dev = [
"llama-cloud>=0.1.17,<1.0.0",
]
[tool.hatch.build.targets.wheel]
[tool.hatch.build]
packages = ["llama_index/"]
artifacts = ["llama_index/server/resources"]
@@ -0,0 +1,154 @@
# /// script
# requires-python = ">=3.10"
# dependencies = []
# ///
# This script is used to build the frontend for the llama-index-server
# You need to have pnpm installed to run this script
import os
import subprocess
import argparse
import shutil
def _get_pnpm_executable() -> str:
"""Determines the correct pnpm executable (pnpm or pnpm.cmd) and returns it.
Exits if pnpm is not found."""
pnpm_exe = shutil.which("pnpm")
if pnpm_exe:
return pnpm_exe
pnpm_cmd_exe = shutil.which("pnpm.cmd")
if pnpm_cmd_exe:
return pnpm_cmd_exe
print("pnpm not found. Please ensure pnpm is installed and in your PATH.")
exit(1)
def check_pnpm_installation() -> None:
pnpm_exe = _get_pnpm_executable()
try:
subprocess.run(
[pnpm_exe, "--version"], check=True, capture_output=True
) # capture_output to silence stdout on success
except subprocess.CalledProcessError:
# This case might be redundant if _get_pnpm_executable exits,
# but kept for robustness in case _get_pnpm_executable is changed.
print(
"pnpm is installed, but '--version' command failed. Please check your pnpm installation."
)
exit(1)
def get_workspace_path() -> str:
pnpm_exe = _get_pnpm_executable()
# Get the absolute path of the workspace
# by running `pnpm root -w`
try:
output = (
subprocess.check_output([pnpm_exe, "root", "-w"]).decode("utf-8").strip()
)
except subprocess.CalledProcessError as e:
print(f"Failed to get workspace path using 'pnpm root -w': {e}")
print("Ensure you are in a pnpm workspace and pnpm is functioning correctly.")
exit(1)
# remove 'node_modules' at the end of the path if it exists
if output.endswith("node_modules"):
return output[:-12]
return output
def build_frontend() -> None:
pnpm_exe = _get_pnpm_executable()
# Build Frontend
print("Building Frontend...")
# TODO: This probably can be copied from node_modules to save time
# but it could be an issue if the user haven't run `pnpm build` for server package
try:
subprocess.run(
[pnpm_exe, "--filter", "@llamaindex/server", "build"], check=True
)
print("Frontend built successfully.")
except subprocess.CalledProcessError as e:
print(f"Frontend build failed: {e}")
exit(1)
def get_paths() -> tuple[str, str, str]:
workspace_path = get_workspace_path()
fe_assets_dir = os.path.join(workspace_path, "packages", "server", "dist", "static")
link_path = os.path.join(
workspace_path,
"python",
"llama-index-server",
"llama_index",
"server",
"resources",
"ui",
)
return workspace_path, fe_assets_dir, link_path
def link_static_files() -> None:
"""
Only works for POSIX systems.
Instead of copying the static files, we can link them.
This is useful for development purposes.
"""
# Link the static files to the llama-index-server directory
# If user is on Windows, tell them to use WSL
if os.name == "nt":
print("Windows is not supported. Please use WSL to run this script.")
exit(1)
print("Linking static files...")
# Need to link by absolute path of the server directory
workspace_path, fe_assets_dir, link_path = get_paths()
# Check
if not os.path.exists(fe_assets_dir):
print(
f"Frontend assets directory {fe_assets_dir} does not exist. Please build the frontend first."
)
exit(1)
if os.path.exists(link_path):
if os.path.islink(link_path):
os.unlink(link_path)
else:
shutil.rmtree(link_path)
# Link the static files to the server directory
subprocess.run(["ln", "-s", fe_assets_dir, link_path], check=True)
print("Static files linked successfully.")
def copy_static_files() -> None:
# Copy the static files to the output directory
workspace_path, fe_assets_dir, link_path = get_paths()
# Remove the ui directory if it exists
if os.path.exists(link_path):
if os.path.islink(link_path):
os.unlink(link_path)
else:
shutil.rmtree(link_path)
# Copy the static files to the output directory
shutil.copytree(fe_assets_dir, link_path, dirs_exist_ok=True)
print("Static files copied successfully.")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Prepare the frontend for the llama-index-server"
)
parser.add_argument(
"--mode",
choices=["link", "copy"],
default="copy",
help="Link the static files instead of copying them. Only works for POSIX systems.",
)
parser.add_argument(
"--skip-build", action="store_true", help="Skip the build step."
)
args = parser.parse_args()
check_pnpm_installation()
if not args.skip_build:
build_frontend()
if args.mode == "link":
link_static_files()
else:
copy_static_files()
+33
View File
@@ -0,0 +1,33 @@
#!/usr/bin/env python3
import json
from pathlib import Path
def sync_versions():
# Read package.json
with open("package.json", "r") as f:
package_data = json.load(f)
npm_version = package_data["version"]
# Read pyproject.toml
pyproject_path = Path("pyproject.toml")
content = pyproject_path.read_text()
# Find the project section and update version
sections = content.split("\n\n")
for i, section in enumerate(sections):
if section.startswith("[project]"):
lines = section.split("\n")
for j, line in enumerate(lines):
if line.startswith("version = "):
lines[j] = f'version = "{npm_version}"'
sections[i] = "\n".join(lines)
break
# Write back to pyproject.toml
pyproject_path.write_text("\n\n".join(sections))
print(f"Updated pyproject.toml version to {npm_version}")
if __name__ == "__main__":
sync_versions()
@@ -1,13 +1,18 @@
import json
import os
import shutil
import tempfile
from pathlib import Path
import pytest
from httpx import ASGITransport, AsyncClient
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.core.llms import MockLLM
from llama_index.server import LlamaIndexServer, UIConfig
UI_TEST = os.getenv("UI_TEST", "false").lower() == "true"
def fetch_weather(city: str) -> str:
"""Fetch the weather for a given city."""
@@ -31,8 +36,7 @@ def server() -> LlamaIndexServer:
workflow_factory=_agent_workflow,
verbose=True,
use_default_routers=True,
mount_ui=False,
env="dev",
ui_config=UIConfig(enabled=False),
)
@@ -55,241 +59,93 @@ async def test_server_swagger_docs(server: LlamaIndexServer) -> None:
assert "Swagger UI" in response.text
@pytest.mark.asyncio()
async def test_ui_is_downloaded(server: LlamaIndexServer) -> None:
"""
Test if the UI is downloaded and mounted correctly.
"""
# Clean up any existing static directory first
if os.path.exists(".ui"):
shutil.rmtree(".ui")
# UI Integration Tests
# Make sure you run the scripts/build_frontend.py script before running these tests
if UI_TEST:
# Create a new server with UI enabled
ui_config = UIConfig(
enabled=True,
app_title="Test UI",
starter_questions=["What's the weather like?"],
)
ui_server = LlamaIndexServer(
workflow_factory=_agent_workflow,
verbose=True,
use_default_routers=True,
env="dev",
ui_config=ui_config,
)
@pytest.mark.asyncio()
async def test_ui_is_copied_and_mounted(tmp_path: Path) -> None:
"""
Test if the UI is copied from bundle and mounted correctly.
"""
tmp_ui_dir = str(tmp_path / "ui")
print(f"tmp_ui_dir: {tmp_ui_dir}")
tmp_component_dir = tempfile.mkdtemp()
# Verify that static directory was created with index.html
assert os.path.exists("./.ui"), "Static directory was not created"
assert os.path.isdir("./.ui"), "Static path is not a directory"
assert os.path.exists("./.ui/index.html"), "index.html was not downloaded"
# Check if the config.js was created with correct content
config_path = os.path.join(".ui", "config.js")
assert os.path.exists(config_path), "config.js was not created"
with open(config_path, "r") as f:
config_content = f.read()
assert "window.LLAMAINDEX =" in config_content
config_json = json.loads(
config_content.replace("window.LLAMAINDEX = ", "").rstrip(";")
# Create a new server with UI enabled
ui_config = UIConfig(
enabled=True,
app_title="Test UI",
starter_questions=["What's the weather like?"],
ui_path=tmp_ui_dir,
component_dir=tmp_component_dir,
)
assert config_json["CHAT_API"] == "/api/chat"
assert config_json["STARTER_QUESTIONS"] == ["What's the weather like?"]
assert config_json["LLAMA_CLOUD_API"] is None
assert config_json["APP_TITLE"] == "Test UI"
# Check if the UI is mounted and accessible
async with AsyncClient(
transport=ASGITransport(app=ui_server), base_url="http://test"
) as ac:
response = await ac.get("/")
assert response.status_code == 200
assert "text/html" in response.headers["content-type"]
# Clean up after test
shutil.rmtree("./.ui")
@pytest.mark.asyncio()
async def test_ui_is_accessible(server: LlamaIndexServer) -> None:
"""
Test if the UI is accessible.
"""
# Manually trigger UI mounting
server.mount_ui()
async with AsyncClient(
transport=ASGITransport(app=server), base_url="http://test"
) as ac:
response = await ac.get("/")
assert response.status_code == 200
assert "text/html" in response.headers["content-type"]
@pytest.mark.asyncio()
async def test_ui_config_customization() -> None:
"""
Test if UI configuration can be customized.
"""
custom_config = UIConfig(
enabled=True,
app_title="Custom App",
starter_questions=["Question 1", "Question 2"],
ui_path=".custom_ui",
)
server = LlamaIndexServer(
workflow_factory=_agent_workflow, verbose=True, ui_config=custom_config
)
assert server.ui_config.app_title == "Custom App"
assert server.ui_config.starter_questions == ["Question 1", "Question 2"]
assert server.ui_config.ui_path == ".custom_ui"
# Clean up if directory was created
if os.path.exists(".custom_ui"):
shutil.rmtree(".custom_ui")
@pytest.mark.asyncio()
async def test_ui_config_from_dict() -> None:
"""
Test if UI configuration can be initialized from a dictionary.
"""
ui_config_dict = {
"enabled": True,
"app_title": "Dict Config App",
"starter_questions": ["Dict Q1", "Dict Q2"],
"ui_path": ".dict_ui",
}
server = LlamaIndexServer(
workflow_factory=_agent_workflow,
verbose=True,
ui_config=ui_config_dict,
)
# Verify the config was properly converted to UIConfig object
assert isinstance(server.ui_config, UIConfig)
assert server.ui_config.app_title == "Dict Config App"
assert server.ui_config.starter_questions == ["Dict Q1", "Dict Q2"]
assert server.ui_config.ui_path == ".dict_ui"
# Verify the config.js is created with correct content
server.mount_ui()
config_path = os.path.join(".dict_ui", "config.js")
assert os.path.exists(config_path), "config.js was not created"
with open(config_path, "r") as f:
config_content = f.read()
assert "window.LLAMAINDEX =" in config_content
config_json = json.loads(
config_content.replace("window.LLAMAINDEX = ", "").rstrip(";")
ui_server = LlamaIndexServer(
workflow_factory=_agent_workflow,
verbose=True,
use_default_routers=True,
env="dev",
ui_config=ui_config,
)
assert config_json["APP_TITLE"] == "Dict Config App"
assert config_json["STARTER_QUESTIONS"] == ["Dict Q1", "Dict Q2"]
assert config_json["CHAT_API"] == "/api/chat"
assert config_json["LLAMA_CLOUD_API"] is None
# Clean up
if os.path.exists(".dict_ui"):
shutil.rmtree(".dict_ui")
# Verify that static directory was created with index.html
# List files in tmp_ui_dir
print("Files in tmp_ui_dir: ", os.listdir(tmp_ui_dir))
assert os.path.exists(tmp_ui_dir), "Static directory was not created"
assert os.path.isdir(tmp_ui_dir), "Static path is not a directory"
assert os.path.exists(os.path.join(tmp_ui_dir, "index.html")), (
"index.html was not copied from bundle"
)
# Check if the config.js was created with correct content
config_path = os.path.join(tmp_ui_dir, "config.js")
assert os.path.exists(config_path), "config.js was not created"
async def test_component_dir_creation(server: LlamaIndexServer) -> None:
"""
Test if the component directory is created when specified and doesn't exist.
"""
import os
import shutil
with open(config_path, "r") as f:
config_content = f.read()
assert "window.LLAMAINDEX =" in config_content
config_json = json.loads(
config_content.replace("window.LLAMAINDEX = ", "").rstrip(";")
)
assert config_json["CHAT_API"] == "/api/chat"
assert config_json["STARTER_QUESTIONS"] == ["What's the weather like?"]
assert config_json["LLAMA_CLOUD_API"] is None
assert config_json["APP_TITLE"] == "Test UI"
test_component_dir = "./test_components"
# Verify directory was created
assert os.path.exists(tmp_component_dir), "Component directory was not created"
assert os.path.isdir(tmp_component_dir), "Component path is not a directory"
# Clean up any existing directory
if os.path.exists(test_component_dir):
shutil.rmtree(test_component_dir)
# Verify component route exists
component_route_exists = any(
route.path == "/api/components" # type: ignore
for route in ui_server.routes
)
assert component_route_exists, "Component API route not found in server routes"
# Create server with component directory
_ = LlamaIndexServer(
workflow_factory=_agent_workflow,
verbose=True,
ui_config={
"component_dir": test_component_dir,
"include_ui": True,
},
)
# Check if the UI is mounted and accessible
async with AsyncClient(
transport=ASGITransport(app=ui_server), base_url="http://test"
) as ac:
response = await ac.get("/")
assert response.status_code == 200
assert "text/html" in response.headers["content-type"]
# Verify directory was created
assert os.path.exists(test_component_dir), "Component directory was not created"
assert os.path.isdir(test_component_dir), "Component path is not a directory"
# Clean up after test
shutil.rmtree(test_component_dir)
# Clean up after test
shutil.rmtree(tmp_ui_dir)
shutil.rmtree(tmp_component_dir)
@pytest.mark.asyncio()
async def test_component_router_addition(server: LlamaIndexServer, tmp_path) -> None:
"""
Test if the component router is added when component directory is specified.
"""
test_component_dir = tmp_path / "test_components"
# Create server with component directory
component_server = LlamaIndexServer(
workflow_factory=_agent_workflow,
verbose=True,
ui_config={
"component_dir": str(test_component_dir),
"include_ui": True,
},
)
# Verify component route exists
component_route_exists = any(
route.path == "/api/components" for route in component_server.routes
)
assert component_route_exists, "Component API route not found in server routes"
@pytest.mark.asyncio()
async def test_ui_config_includes_components_api(
server: LlamaIndexServer, tmp_path
) -> None:
"""
Test if the UI config includes components API when component directory is set.
"""
test_component_dir = tmp_path / "test_components"
# Create server with component directory
component_server = LlamaIndexServer(
workflow_factory=_agent_workflow,
verbose=True,
ui_config={
"component_dir": str(test_component_dir),
"include_ui": True,
},
)
# Check if components API is in UI config
ui_config = component_server.ui_config
assert "COMPONENTS_API" in ui_config.get_config_content(), (
"Components API not found in UI config"
)
@pytest.mark.asyncio()
async def test_component_router_requires_component_dir(
server: LlamaIndexServer,
) -> None:
async def test_component_router_requires_component_dir() -> None:
"""
Test that adding components router without component_dir raises an error.
"""
tmp_ui_dir = tempfile.mkdtemp()
server_without_component_dir = LlamaIndexServer(
workflow_factory=_agent_workflow,
verbose=True,
ui_config={
"include_ui": True,
},
ui_config=UIConfig(enabled=True, ui_path=tmp_ui_dir),
)
with pytest.raises(
+1 -1
View File
@@ -1897,7 +1897,7 @@ wheels = [
[[package]]
name = "llama-index-server"
version = "0.1.15"
version = "0.1.16"
source = { editable = "." }
dependencies = [
{ name = "cachetools" },