mirror of
https://github.com/run-llama/llamacloud_streamlit.git
synced 2026-07-01 21:44:46 -04:00
add src code + docs
This commit is contained in:
@@ -169,3 +169,6 @@ cython_debug/
|
||||
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
# Streamlit specific
|
||||
.streamlit/secrets.toml
|
||||
@@ -1,2 +1,27 @@
|
||||
# llamacloud_streamlit
|
||||
Streamlit App Showcasing LlamaCloud's Capabilities
|
||||
This repo uses Streamlit to create an user-facing demo application that showcases various capabilities from LlamaCloud.
|
||||
|
||||
You can use the application now by visiting https://example.com
|
||||
|
||||
You will need access to LlamaCloud in order to create an API key first to use within the app.
|
||||
|
||||
## Why did we create this?
|
||||
|
||||
We wanted to create a demo that showcases some of the Agentic RAG capabilities that LlamaCloud enables through an interactive UX.
|
||||
Additionally, by open-sourcing the codebase for this, we hope that developers can use this code as a reference for setting up their own applications that rely on the LlamaCloud API.
|
||||
|
||||
## Development Setup
|
||||
|
||||
Here are the steps for setting up your development environment to run this project locally:
|
||||
|
||||
1. Clone this repo e.g. `gh repo clone run-llama/llamacloud_streamlit`
|
||||
1. [Install `poetry`](https://python-poetry.org/docs/#installation) if you haven't already
|
||||
1. Install the poetry dependencies by running `poetry shell` and then `poetry install` within this project's root directory.
|
||||
1. Add a `secrets.toml` file in the `.streamlit` folder and add a value for `openai_key` to it
|
||||
- `touch .streamlit/secrets.toml`
|
||||
- Add a line within the newly created `secrets.toml` that reads `openai_key = "YOUR OPENAI API KEY"`
|
||||
1. Run `make run` to run the streamlit app locally. You can then visit the application at `http://localhost:8501`
|
||||
- Please note you will need to setup the LlamaCloud API key the app will use on the API Keys tab in the UI first.
|
||||
|
||||
|
||||
|
||||
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
import asyncio
|
||||
from collections import OrderedDict
|
||||
from typing import Callable, Coroutine, Dict
|
||||
import streamlit as st
|
||||
from tabs.api_key import api_key_tab
|
||||
from tabs.indices import indices_tab
|
||||
from tabs.composite_retriever import composite_retriever_tab
|
||||
from tabs.chat import chat_tab
|
||||
|
||||
TABS_DICT: Dict[str, Callable[..., Coroutine]] = OrderedDict([
|
||||
("API Key", api_key_tab),
|
||||
("Indices", indices_tab),
|
||||
("Composite Retriever", composite_retriever_tab),
|
||||
("Chat", chat_tab),
|
||||
])
|
||||
|
||||
|
||||
async def main():
|
||||
st.set_page_config(page_title="LlamaCloud App", page_icon="🦙", layout="centered", initial_sidebar_state="auto", menu_items=None)
|
||||
|
||||
tabs = st.tabs(TABS_DICT.keys())
|
||||
|
||||
for tab_name, tab in zip(TABS_DICT.keys(), tabs):
|
||||
with tab:
|
||||
await TABS_DICT[tab_name]()
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,18 @@
|
||||
import streamlit as st
|
||||
|
||||
async def api_key_tab():
|
||||
st.write("Enter your API key for LlamaCloud:")
|
||||
with st.form(key='api_key_form'):
|
||||
if st.session_state.get("llx_base_url") is None:
|
||||
st.session_state.llx_base_url = "https://api.staging.llamaindex.ai"
|
||||
base_url = st.text_input("Base URL", key="llx_base_url", placeholder="https://api.staging.llamaindex.ai")
|
||||
api_key = st.text_input("API Key", type="password", key="llx_api_key", placeholder="llx-...")
|
||||
submit_button = st.form_submit_button(label='Submit')
|
||||
|
||||
if submit_button:
|
||||
if not api_key.startswith("llx-"):
|
||||
st.error("Invalid API key. Please try again.")
|
||||
elif not base_url.startswith("http"):
|
||||
st.error("Base URL must start with http. Please try again")
|
||||
else:
|
||||
st.toast("API Key submitted!")
|
||||
@@ -0,0 +1,102 @@
|
||||
import streamlit as st
|
||||
from typing import List
|
||||
import openai
|
||||
from llama_index.llms.openai import OpenAI
|
||||
from llama_index.core.schema import TextNode, QueryBundle, NodeWithScore
|
||||
from llama_index.core import Settings
|
||||
from llama_index.core.retrievers import BaseRetriever
|
||||
from llama_index.core.chat_engine.types import BaseChatEngine
|
||||
from llama_index.core.chat_engine import CondensePlusContextChatEngine
|
||||
from llama_index.core.memory import ChatMemoryBuffer
|
||||
from llama_cloud.client import AsyncLlamaCloud
|
||||
from llama_cloud.types import Retriever
|
||||
from utils import get_llamacloud_client, get_project_selector
|
||||
|
||||
class LlamaCloudCompositeRetriever(BaseRetriever):
|
||||
|
||||
def __init__(self, client: AsyncLlamaCloud, retriever: Retriever) -> None:
|
||||
super().__init__()
|
||||
self.client = client
|
||||
self.retriever = retriever
|
||||
|
||||
def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
|
||||
raise NotImplementedError("Use aretrieve instead")
|
||||
|
||||
async def _aretrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
|
||||
results = await self.client.retrievers.retrieve(retriever_id=self.retriever.id, query=query_bundle.query_str)
|
||||
return [
|
||||
NodeWithScore(
|
||||
node=TextNode(
|
||||
id_=retrieved_node.id,
|
||||
text=retrieved_node.text,
|
||||
metadata=retrieved_node.metadata
|
||||
),
|
||||
score=1.0,
|
||||
)
|
||||
for retrieved_node in results.nodes
|
||||
]
|
||||
|
||||
async def chat_tab():
|
||||
client = get_llamacloud_client()
|
||||
if client is None:
|
||||
st.write("Fill the form on the API Key tab first.")
|
||||
return
|
||||
openai.api_key = st.secrets.openai_key
|
||||
Settings.llm = OpenAI(
|
||||
model="gpt-4o-mini",
|
||||
temperature=0.2,
|
||||
system_prompt="You are a friendly Q&A Chatbot",
|
||||
api_key=st.secrets.openai_key,
|
||||
)
|
||||
st.title("Chat with a Composite Retriever")
|
||||
selected_project = await get_project_selector(client, "chat")
|
||||
retrievers = await client.retrievers.list_retrievers(project_id=selected_project.id)
|
||||
if not retrievers:
|
||||
st.write(f"No retrievers found under '{selected_project.name}' project. Create a composite retriever first on the 'Composite Retriever' tab.")
|
||||
return
|
||||
retrievers = sorted(retrievers, key=lambda r: r.name)
|
||||
selected_retriever = st.selectbox("Select Retriever", retrievers, format_func=lambda r: r.name, key="retriever_selector")
|
||||
|
||||
|
||||
st.session_state.messages = st.session_state.get(
|
||||
"messages",
|
||||
[
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "Ask me a question about the data ingested by the selected indices!",
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
chat_engine: BaseChatEngine = st.session_state.get(
|
||||
"chat_engine",
|
||||
CondensePlusContextChatEngine.from_defaults(
|
||||
retriever=LlamaCloudCompositeRetriever(
|
||||
client=client,
|
||||
retriever=selected_retriever
|
||||
),
|
||||
chat_history=st.session_state.messages,
|
||||
memory=ChatMemoryBuffer(token_limit=3000),
|
||||
llm=Settings.llm,
|
||||
verbose=True,
|
||||
),
|
||||
)
|
||||
st.session_state.chat_engine = chat_engine
|
||||
|
||||
if prompt := st.chat_input(
|
||||
"Ask a question"
|
||||
): # Prompt for user input and save to chat history
|
||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||
|
||||
for message in st.session_state.messages: # Write message history to UI
|
||||
with st.chat_message(message["role"]):
|
||||
st.write(message["content"])
|
||||
|
||||
# If last message is not from assistant, generate a new response
|
||||
if st.session_state.messages[-1]["role"] != "assistant":
|
||||
with st.chat_message("assistant"):
|
||||
response_stream = await chat_engine.achat(prompt)
|
||||
st.write(response_stream.response)
|
||||
message = {"role": "assistant", "content": response_stream.response}
|
||||
# Add response to message history
|
||||
st.session_state.messages.append(message)
|
||||
@@ -0,0 +1,86 @@
|
||||
import pandas as pd
|
||||
import streamlit as st
|
||||
from utils import get_llamacloud_client, get_project_selector
|
||||
from llama_cloud.types import RetrieverCreate, RetrieverPipeline
|
||||
|
||||
|
||||
|
||||
async def composite_retriever_tab():
|
||||
client = get_llamacloud_client()
|
||||
if client is None:
|
||||
st.write("Fill the form on the API Key tab first.")
|
||||
return
|
||||
selected_project = await get_project_selector(client, "composite_retriever")
|
||||
|
||||
project_container = st.container(border=True, key="project_container_composite_retriever")
|
||||
project_container.header(selected_project.name)
|
||||
|
||||
pipelines = await client.pipelines.search_pipelines(project_id=selected_project.id)
|
||||
pipelines = sorted(pipelines, key=lambda p: p.name)
|
||||
pipeline_name_to_pipeline = {p.name: p for p in pipelines}
|
||||
with project_container.form(key="create_composite_retriever_form"):
|
||||
composite_retriever_name = st.text_input("Composite Retriever Name", key="composite_retriever_name")
|
||||
# sub_indices: List[RetrieverPipeline] = []
|
||||
# for idx, sub_index in enumerate(sub_indices):
|
||||
# sub_index_container = st.container(border=True, key=f"sub_index_{idx}")
|
||||
# sub_index_container.write(f"Sub-Index {idx}")
|
||||
# default_pipeline_idx = next((i for i, p in enumerate(pipelines) if p.id == sub_index.pipeline_id), 0)
|
||||
# selected_pipeline = st.selectbox("Select Sub-Index", pipelines, key=f"sub_index_{idx}_selectbox", index=default_pipeline_idx, format_func=lambda p: p.name)
|
||||
# sub_index.pipeline_id = selected_pipeline.id
|
||||
# add_sub_index = st.button("Add Sub-Index")
|
||||
retriever_pipelines_df = st.data_editor(
|
||||
pd.DataFrame([{"name": None, "description": None, "pipeline_name": ""}]),
|
||||
column_config={
|
||||
"name": st.column_config.TextColumn("Name", required=True),
|
||||
"description": st.column_config.TextColumn("Description", required=False),
|
||||
"pipeline_name": st.column_config.SelectboxColumn("Index", options=[p.name for p in pipelines], required=True),
|
||||
},
|
||||
num_rows="dynamic",
|
||||
hide_index=False,
|
||||
key="retriever_pipelines_df"
|
||||
)
|
||||
create_composite_retriever_button = st.form_submit_button(label="Upsert Composite Retriever")
|
||||
|
||||
if create_composite_retriever_button:
|
||||
if not composite_retriever_name:
|
||||
project_container.error("Composite Retriever name cannot be empty.")
|
||||
else:
|
||||
retriever_pipelines = [
|
||||
RetrieverPipeline(
|
||||
name=row.name,
|
||||
description=row.description,
|
||||
pipeline_id=pipeline_name_to_pipeline[row.pipeline_name].id
|
||||
)
|
||||
for row in retriever_pipelines_df.itertuples()
|
||||
]
|
||||
retriever_create_payload = RetrieverCreate(
|
||||
name=composite_retriever_name,
|
||||
pipelines=retriever_pipelines
|
||||
)
|
||||
await client.retrievers.upsert_retriever(project_id=selected_project.id, request=retriever_create_payload)
|
||||
project_container.success(f"Composite Retriever {composite_retriever_name} upserted!")
|
||||
|
||||
retrievers = await client.retrievers.list_retrievers(project_id=selected_project.id)
|
||||
if not retrievers:
|
||||
project_container.write("No existing Composite Retrievers found.")
|
||||
return
|
||||
pipeline_id_to_pipeline = {p.id: p for p in pipelines}
|
||||
project_container.write(f"Composite Retrievers:")
|
||||
for retriever in retrievers:
|
||||
retriever_container = project_container.container(border=True, key="retriever_" + retriever.id)
|
||||
retriever_container.subheader(retriever.name)
|
||||
retriever_container.write(f"Retriever ID: {retriever.id}")
|
||||
retriever_container.write(f"Sub-Indices in Composite Retriever: {len(retriever.pipelines)}")
|
||||
for sub_index in retriever.pipelines:
|
||||
sub_index_container = retriever_container.container(border=True, key=f"sub_index_{sub_index.name}")
|
||||
sub_index_pipeline = pipeline_id_to_pipeline[sub_index.pipeline_id]
|
||||
sub_index_container.subheader(f"Sub-Index: {sub_index.name}")
|
||||
sub_index_container.write(f"Description: {sub_index.description}")
|
||||
sub_index_container.write(f"Pipeline: {sub_index_pipeline.name}")
|
||||
delete_button = retriever_container.button("Delete 🗑️", key="delete_retriever_" + retriever.id)
|
||||
if delete_button:
|
||||
await client.retrievers.delete_retriever(retriever_id=retriever.id)
|
||||
retrievers = await client.retrievers.list_retrievers(project_id=selected_project.id)
|
||||
project_container.success(f"Composite Retriever {retriever.name} deleted!")
|
||||
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
from typing import List
|
||||
import streamlit as st
|
||||
from utils import get_llamacloud_client, get_project_selector
|
||||
from llama_cloud.types import (
|
||||
PipelineCreate,
|
||||
PipelineFileCreate,
|
||||
File,
|
||||
PipelineTransformConfig_Auto,
|
||||
PipelineCreateEmbeddingConfig_OpenaiEmbedding,
|
||||
OpenAiEmbedding
|
||||
)
|
||||
|
||||
|
||||
async def indices_tab():
|
||||
client = get_llamacloud_client()
|
||||
if client is None:
|
||||
st.write("Fill the form on the API Key tab first.")
|
||||
return
|
||||
supported_extensions = await client.parsing.get_supported_file_extensions()
|
||||
selected_project = await get_project_selector(client, "indices")
|
||||
|
||||
project_container = st.container(border=True, key="project_container_indices")
|
||||
project_container.header(selected_project.name)
|
||||
|
||||
with project_container.form(key="create_pipeline_form"):
|
||||
pipeline_name = st.text_input("Index Name", key="pipeline_name")
|
||||
create_pipeline_button = st.form_submit_button(label="Create Index")
|
||||
|
||||
if create_pipeline_button:
|
||||
if not pipeline_name:
|
||||
project_container.error("Pipeline name cannot be empty.")
|
||||
else:
|
||||
openai_embedding = OpenAiEmbedding(api_key=st.secrets.openai_key)
|
||||
embedding_config = PipelineCreateEmbeddingConfig_OpenaiEmbedding(type="OPENAI_EMBEDDING", component=openai_embedding)
|
||||
pipeline_payload = PipelineCreate(
|
||||
name=pipeline_name,
|
||||
transform_config=PipelineTransformConfig_Auto(mode="auto"),
|
||||
embedding_config=embedding_config,
|
||||
)
|
||||
await client.pipelines.upsert_pipeline(project_id=selected_project.id, request=pipeline_payload)
|
||||
project_container.success(f"Pipeline {pipeline_name} created!")
|
||||
|
||||
pipelines = await client.pipelines.search_pipelines(project_id=selected_project.id)
|
||||
project_container.write(f"Add files to indices:")
|
||||
file_types = [supported_extension.lower() for supported_extension in supported_extensions]
|
||||
for pipeline in pipelines:
|
||||
pipeline_files = await client.pipelines.list_pipeline_files(pipeline_id=pipeline.id)
|
||||
pipeline_container = project_container.container(border=True, key="add_files_pipeline_" + pipeline.id)
|
||||
pipeline_container.subheader(pipeline.name)
|
||||
pipeline_container.write(f"Index ID: {pipeline.id}")
|
||||
pipeline_container.write(f"Files in index: {len(pipeline_files)}")
|
||||
with pipeline_container.form(key=f"add_files_form_pipeline_{pipeline.id}"):
|
||||
uploaded_files = st.file_uploader("Upload Files", type=file_types, key="files_" + pipeline.id, accept_multiple_files=True)
|
||||
add_files_button = st.form_submit_button(label="Add Files")
|
||||
if not add_files_button:
|
||||
continue
|
||||
project_files: List[File] = []
|
||||
for idx, uploaded_file in enumerate(uploaded_files):
|
||||
project_file = await client.files.upload_file(project_id=pipeline.project_id, upload_file=uploaded_file)
|
||||
project_files.append(project_file)
|
||||
st.toast(f"{idx + 1}/{len(uploaded_files)}: File {uploaded_file.name} uploaded to project {selected_project.name}!")
|
||||
await client.pipelines.add_files_to_pipeline(
|
||||
pipeline_id=pipeline.id,
|
||||
request=[
|
||||
PipelineFileCreate(file_id=project_file.id)
|
||||
for project_file in project_files
|
||||
]
|
||||
)
|
||||
st.toast(f"{len(uploaded_files)} Files added to pipeline {pipeline.name}!")
|
||||
@@ -0,0 +1,29 @@
|
||||
from typing import Sequence, Optional, List
|
||||
from llama_cloud.client import AsyncLlamaCloud
|
||||
from llama_cloud.types import ProjectCreate, Project
|
||||
import streamlit as st
|
||||
|
||||
DEFAULT_STREAMLIT_PROJECT_NAME = "Streamlit Project"
|
||||
|
||||
def check_session_state_keys_populated(required_state_keys: Sequence[str]) -> bool:
|
||||
return all(st.session_state.get(key) for key in required_state_keys)
|
||||
|
||||
|
||||
def get_llamacloud_client() -> Optional[AsyncLlamaCloud]:
|
||||
if not check_session_state_keys_populated(["llx_base_url", "llx_api_key"]):
|
||||
return None
|
||||
return AsyncLlamaCloud(base_url=st.session_state.llx_base_url, token=st.session_state.llx_api_key)
|
||||
|
||||
async def get_project_selector(client: AsyncLlamaCloud, key_suffix: str, default_project_name: str = DEFAULT_STREAMLIT_PROJECT_NAME) -> Project:
|
||||
default_project = await client.projects.upsert_project(request=ProjectCreate(name=default_project_name))
|
||||
st.session_state.project_id = st.session_state.get("project_id", default_project.id)
|
||||
projects: List[Project] = await client.projects.list_projects(organization_id=default_project.organization_id)
|
||||
projects = sorted(projects, key=lambda p: p.name)
|
||||
default_project_idx = next((i for i, p in enumerate(projects) if p.id == st.session_state.project_id), 0)
|
||||
selected_project = st.selectbox("Select Project",
|
||||
projects,
|
||||
key=f"project_selector_{key_suffix}",
|
||||
index=default_project_idx,
|
||||
format_func=lambda p: p.name)
|
||||
st.session_state.project_id = selected_project.id
|
||||
return selected_project
|
||||
Generated
+3036
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,23 @@
|
||||
[tool.poetry]
|
||||
name = "llamacloud-streamlit"
|
||||
version = "0.1.0"
|
||||
description = "Streamlit App Showcasing LlamaCloud's Capabilities"
|
||||
authors = ["Sourabh Desai <sourabh@runllama.ai>"]
|
||||
license = "MIT"
|
||||
readme = "README.md"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.11"
|
||||
llama-index = "^0.12.8"
|
||||
llama-index-embeddings-openai = "^0.3.1"
|
||||
llama-index-indices-managed-llama-cloud = "^0.6.3"
|
||||
streamlit = "^1.41.1"
|
||||
llama-cloud = "^0.1.7"
|
||||
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
watchdog = "^6.0.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
Reference in New Issue
Block a user