add src code + docs

This commit is contained in:
Sourabh Desai
2024-12-27 09:57:19 -08:00
parent dece8cd1d7
commit 3a4fc857dc
13 changed files with 3423 additions and 1 deletions
+3
View File
@@ -169,3 +169,6 @@ cython_debug/
# PyPI configuration file
.pypirc
# Streamlit specific
.streamlit/secrets.toml
+3
View File
@@ -0,0 +1,3 @@
run:
poetry run streamlit run app/main.py
+26 -1
View File
@@ -1,2 +1,27 @@
# llamacloud_streamlit
Streamlit App Showcasing LlamaCloud's Capabilities
This repo uses Streamlit to create an user-facing demo application that showcases various capabilities from LlamaCloud.
You can use the application now by visiting https://example.com
You will need access to LlamaCloud in order to create an API key first to use within the app.
## Why did we create this?
We wanted to create a demo that showcases some of the Agentic RAG capabilities that LlamaCloud enables through an interactive UX.
Additionally, by open-sourcing the codebase for this, we hope that developers can use this code as a reference for setting up their own applications that rely on the LlamaCloud API.
## Development Setup
Here are the steps for setting up your development environment to run this project locally:
1. Clone this repo e.g. `gh repo clone run-llama/llamacloud_streamlit`
1. [Install `poetry`](https://python-poetry.org/docs/#installation) if you haven't already
1. Install the poetry dependencies by running `poetry shell` and then `poetry install` within this project's root directory.
1. Add a `secrets.toml` file in the `.streamlit` folder and add a value for `openai_key` to it
- `touch .streamlit/secrets.toml`
- Add a line within the newly created `secrets.toml` that reads `openai_key = "YOUR OPENAI API KEY"`
1. Run `make run` to run the streamlit app locally. You can then visit the application at `http://localhost:8501`
- Please note you will need to setup the LlamaCloud API key the app will use on the API Keys tab in the UI first.
View File
+28
View File
@@ -0,0 +1,28 @@
import asyncio
from collections import OrderedDict
from typing import Callable, Coroutine, Dict
import streamlit as st
from tabs.api_key import api_key_tab
from tabs.indices import indices_tab
from tabs.composite_retriever import composite_retriever_tab
from tabs.chat import chat_tab
TABS_DICT: Dict[str, Callable[..., Coroutine]] = OrderedDict([
("API Key", api_key_tab),
("Indices", indices_tab),
("Composite Retriever", composite_retriever_tab),
("Chat", chat_tab),
])
async def main():
st.set_page_config(page_title="LlamaCloud App", page_icon="🦙", layout="centered", initial_sidebar_state="auto", menu_items=None)
tabs = st.tabs(TABS_DICT.keys())
for tab_name, tab in zip(TABS_DICT.keys(), tabs):
with tab:
await TABS_DICT[tab_name]()
if __name__ == "__main__":
asyncio.run(main())
View File
+18
View File
@@ -0,0 +1,18 @@
import streamlit as st
async def api_key_tab():
st.write("Enter your API key for LlamaCloud:")
with st.form(key='api_key_form'):
if st.session_state.get("llx_base_url") is None:
st.session_state.llx_base_url = "https://api.staging.llamaindex.ai"
base_url = st.text_input("Base URL", key="llx_base_url", placeholder="https://api.staging.llamaindex.ai")
api_key = st.text_input("API Key", type="password", key="llx_api_key", placeholder="llx-...")
submit_button = st.form_submit_button(label='Submit')
if submit_button:
if not api_key.startswith("llx-"):
st.error("Invalid API key. Please try again.")
elif not base_url.startswith("http"):
st.error("Base URL must start with http. Please try again")
else:
st.toast("API Key submitted!")
+102
View File
@@ -0,0 +1,102 @@
import streamlit as st
from typing import List
import openai
from llama_index.llms.openai import OpenAI
from llama_index.core.schema import TextNode, QueryBundle, NodeWithScore
from llama_index.core import Settings
from llama_index.core.retrievers import BaseRetriever
from llama_index.core.chat_engine.types import BaseChatEngine
from llama_index.core.chat_engine import CondensePlusContextChatEngine
from llama_index.core.memory import ChatMemoryBuffer
from llama_cloud.client import AsyncLlamaCloud
from llama_cloud.types import Retriever
from utils import get_llamacloud_client, get_project_selector
class LlamaCloudCompositeRetriever(BaseRetriever):
def __init__(self, client: AsyncLlamaCloud, retriever: Retriever) -> None:
super().__init__()
self.client = client
self.retriever = retriever
def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
raise NotImplementedError("Use aretrieve instead")
async def _aretrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
results = await self.client.retrievers.retrieve(retriever_id=self.retriever.id, query=query_bundle.query_str)
return [
NodeWithScore(
node=TextNode(
id_=retrieved_node.id,
text=retrieved_node.text,
metadata=retrieved_node.metadata
),
score=1.0,
)
for retrieved_node in results.nodes
]
async def chat_tab():
client = get_llamacloud_client()
if client is None:
st.write("Fill the form on the API Key tab first.")
return
openai.api_key = st.secrets.openai_key
Settings.llm = OpenAI(
model="gpt-4o-mini",
temperature=0.2,
system_prompt="You are a friendly Q&A Chatbot",
api_key=st.secrets.openai_key,
)
st.title("Chat with a Composite Retriever")
selected_project = await get_project_selector(client, "chat")
retrievers = await client.retrievers.list_retrievers(project_id=selected_project.id)
if not retrievers:
st.write(f"No retrievers found under '{selected_project.name}' project. Create a composite retriever first on the 'Composite Retriever' tab.")
return
retrievers = sorted(retrievers, key=lambda r: r.name)
selected_retriever = st.selectbox("Select Retriever", retrievers, format_func=lambda r: r.name, key="retriever_selector")
st.session_state.messages = st.session_state.get(
"messages",
[
{
"role": "assistant",
"content": "Ask me a question about the data ingested by the selected indices!",
}
]
)
chat_engine: BaseChatEngine = st.session_state.get(
"chat_engine",
CondensePlusContextChatEngine.from_defaults(
retriever=LlamaCloudCompositeRetriever(
client=client,
retriever=selected_retriever
),
chat_history=st.session_state.messages,
memory=ChatMemoryBuffer(token_limit=3000),
llm=Settings.llm,
verbose=True,
),
)
st.session_state.chat_engine = chat_engine
if prompt := st.chat_input(
"Ask a question"
): # Prompt for user input and save to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
for message in st.session_state.messages: # Write message history to UI
with st.chat_message(message["role"]):
st.write(message["content"])
# If last message is not from assistant, generate a new response
if st.session_state.messages[-1]["role"] != "assistant":
with st.chat_message("assistant"):
response_stream = await chat_engine.achat(prompt)
st.write(response_stream.response)
message = {"role": "assistant", "content": response_stream.response}
# Add response to message history
st.session_state.messages.append(message)
+86
View File
@@ -0,0 +1,86 @@
import pandas as pd
import streamlit as st
from utils import get_llamacloud_client, get_project_selector
from llama_cloud.types import RetrieverCreate, RetrieverPipeline
async def composite_retriever_tab():
client = get_llamacloud_client()
if client is None:
st.write("Fill the form on the API Key tab first.")
return
selected_project = await get_project_selector(client, "composite_retriever")
project_container = st.container(border=True, key="project_container_composite_retriever")
project_container.header(selected_project.name)
pipelines = await client.pipelines.search_pipelines(project_id=selected_project.id)
pipelines = sorted(pipelines, key=lambda p: p.name)
pipeline_name_to_pipeline = {p.name: p for p in pipelines}
with project_container.form(key="create_composite_retriever_form"):
composite_retriever_name = st.text_input("Composite Retriever Name", key="composite_retriever_name")
# sub_indices: List[RetrieverPipeline] = []
# for idx, sub_index in enumerate(sub_indices):
# sub_index_container = st.container(border=True, key=f"sub_index_{idx}")
# sub_index_container.write(f"Sub-Index {idx}")
# default_pipeline_idx = next((i for i, p in enumerate(pipelines) if p.id == sub_index.pipeline_id), 0)
# selected_pipeline = st.selectbox("Select Sub-Index", pipelines, key=f"sub_index_{idx}_selectbox", index=default_pipeline_idx, format_func=lambda p: p.name)
# sub_index.pipeline_id = selected_pipeline.id
# add_sub_index = st.button("Add Sub-Index")
retriever_pipelines_df = st.data_editor(
pd.DataFrame([{"name": None, "description": None, "pipeline_name": ""}]),
column_config={
"name": st.column_config.TextColumn("Name", required=True),
"description": st.column_config.TextColumn("Description", required=False),
"pipeline_name": st.column_config.SelectboxColumn("Index", options=[p.name for p in pipelines], required=True),
},
num_rows="dynamic",
hide_index=False,
key="retriever_pipelines_df"
)
create_composite_retriever_button = st.form_submit_button(label="Upsert Composite Retriever")
if create_composite_retriever_button:
if not composite_retriever_name:
project_container.error("Composite Retriever name cannot be empty.")
else:
retriever_pipelines = [
RetrieverPipeline(
name=row.name,
description=row.description,
pipeline_id=pipeline_name_to_pipeline[row.pipeline_name].id
)
for row in retriever_pipelines_df.itertuples()
]
retriever_create_payload = RetrieverCreate(
name=composite_retriever_name,
pipelines=retriever_pipelines
)
await client.retrievers.upsert_retriever(project_id=selected_project.id, request=retriever_create_payload)
project_container.success(f"Composite Retriever {composite_retriever_name} upserted!")
retrievers = await client.retrievers.list_retrievers(project_id=selected_project.id)
if not retrievers:
project_container.write("No existing Composite Retrievers found.")
return
pipeline_id_to_pipeline = {p.id: p for p in pipelines}
project_container.write(f"Composite Retrievers:")
for retriever in retrievers:
retriever_container = project_container.container(border=True, key="retriever_" + retriever.id)
retriever_container.subheader(retriever.name)
retriever_container.write(f"Retriever ID: {retriever.id}")
retriever_container.write(f"Sub-Indices in Composite Retriever: {len(retriever.pipelines)}")
for sub_index in retriever.pipelines:
sub_index_container = retriever_container.container(border=True, key=f"sub_index_{sub_index.name}")
sub_index_pipeline = pipeline_id_to_pipeline[sub_index.pipeline_id]
sub_index_container.subheader(f"Sub-Index: {sub_index.name}")
sub_index_container.write(f"Description: {sub_index.description}")
sub_index_container.write(f"Pipeline: {sub_index_pipeline.name}")
delete_button = retriever_container.button("Delete 🗑️", key="delete_retriever_" + retriever.id)
if delete_button:
await client.retrievers.delete_retriever(retriever_id=retriever.id)
retrievers = await client.retrievers.list_retrievers(project_id=selected_project.id)
project_container.success(f"Composite Retriever {retriever.name} deleted!")
+69
View File
@@ -0,0 +1,69 @@
from typing import List
import streamlit as st
from utils import get_llamacloud_client, get_project_selector
from llama_cloud.types import (
PipelineCreate,
PipelineFileCreate,
File,
PipelineTransformConfig_Auto,
PipelineCreateEmbeddingConfig_OpenaiEmbedding,
OpenAiEmbedding
)
async def indices_tab():
client = get_llamacloud_client()
if client is None:
st.write("Fill the form on the API Key tab first.")
return
supported_extensions = await client.parsing.get_supported_file_extensions()
selected_project = await get_project_selector(client, "indices")
project_container = st.container(border=True, key="project_container_indices")
project_container.header(selected_project.name)
with project_container.form(key="create_pipeline_form"):
pipeline_name = st.text_input("Index Name", key="pipeline_name")
create_pipeline_button = st.form_submit_button(label="Create Index")
if create_pipeline_button:
if not pipeline_name:
project_container.error("Pipeline name cannot be empty.")
else:
openai_embedding = OpenAiEmbedding(api_key=st.secrets.openai_key)
embedding_config = PipelineCreateEmbeddingConfig_OpenaiEmbedding(type="OPENAI_EMBEDDING", component=openai_embedding)
pipeline_payload = PipelineCreate(
name=pipeline_name,
transform_config=PipelineTransformConfig_Auto(mode="auto"),
embedding_config=embedding_config,
)
await client.pipelines.upsert_pipeline(project_id=selected_project.id, request=pipeline_payload)
project_container.success(f"Pipeline {pipeline_name} created!")
pipelines = await client.pipelines.search_pipelines(project_id=selected_project.id)
project_container.write(f"Add files to indices:")
file_types = [supported_extension.lower() for supported_extension in supported_extensions]
for pipeline in pipelines:
pipeline_files = await client.pipelines.list_pipeline_files(pipeline_id=pipeline.id)
pipeline_container = project_container.container(border=True, key="add_files_pipeline_" + pipeline.id)
pipeline_container.subheader(pipeline.name)
pipeline_container.write(f"Index ID: {pipeline.id}")
pipeline_container.write(f"Files in index: {len(pipeline_files)}")
with pipeline_container.form(key=f"add_files_form_pipeline_{pipeline.id}"):
uploaded_files = st.file_uploader("Upload Files", type=file_types, key="files_" + pipeline.id, accept_multiple_files=True)
add_files_button = st.form_submit_button(label="Add Files")
if not add_files_button:
continue
project_files: List[File] = []
for idx, uploaded_file in enumerate(uploaded_files):
project_file = await client.files.upload_file(project_id=pipeline.project_id, upload_file=uploaded_file)
project_files.append(project_file)
st.toast(f"{idx + 1}/{len(uploaded_files)}: File {uploaded_file.name} uploaded to project {selected_project.name}!")
await client.pipelines.add_files_to_pipeline(
pipeline_id=pipeline.id,
request=[
PipelineFileCreate(file_id=project_file.id)
for project_file in project_files
]
)
st.toast(f"{len(uploaded_files)} Files added to pipeline {pipeline.name}!")
+29
View File
@@ -0,0 +1,29 @@
from typing import Sequence, Optional, List
from llama_cloud.client import AsyncLlamaCloud
from llama_cloud.types import ProjectCreate, Project
import streamlit as st
DEFAULT_STREAMLIT_PROJECT_NAME = "Streamlit Project"
def check_session_state_keys_populated(required_state_keys: Sequence[str]) -> bool:
return all(st.session_state.get(key) for key in required_state_keys)
def get_llamacloud_client() -> Optional[AsyncLlamaCloud]:
if not check_session_state_keys_populated(["llx_base_url", "llx_api_key"]):
return None
return AsyncLlamaCloud(base_url=st.session_state.llx_base_url, token=st.session_state.llx_api_key)
async def get_project_selector(client: AsyncLlamaCloud, key_suffix: str, default_project_name: str = DEFAULT_STREAMLIT_PROJECT_NAME) -> Project:
default_project = await client.projects.upsert_project(request=ProjectCreate(name=default_project_name))
st.session_state.project_id = st.session_state.get("project_id", default_project.id)
projects: List[Project] = await client.projects.list_projects(organization_id=default_project.organization_id)
projects = sorted(projects, key=lambda p: p.name)
default_project_idx = next((i for i, p in enumerate(projects) if p.id == st.session_state.project_id), 0)
selected_project = st.selectbox("Select Project",
projects,
key=f"project_selector_{key_suffix}",
index=default_project_idx,
format_func=lambda p: p.name)
st.session_state.project_id = selected_project.id
return selected_project
Generated
+3036
View File
File diff suppressed because it is too large Load Diff
+23
View File
@@ -0,0 +1,23 @@
[tool.poetry]
name = "llamacloud-streamlit"
version = "0.1.0"
description = "Streamlit App Showcasing LlamaCloud's Capabilities"
authors = ["Sourabh Desai <sourabh@runllama.ai>"]
license = "MIT"
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.11"
llama-index = "^0.12.8"
llama-index-embeddings-openai = "^0.3.1"
llama-index-indices-managed-llama-cloud = "^0.6.3"
streamlit = "^1.41.1"
llama-cloud = "^0.1.7"
[tool.poetry.group.dev.dependencies]
watchdog = "^6.0.0"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"