add src code + docs

2026-07-01 21:44:46 -04:00 · 2024-12-27 09:57:19 -08:00
parent dece8cd1d7
commit 3a4fc857dc
13 changed files with 3423 additions and 1 deletions
@@ -169,3 +169,6 @@ cython_debug/

 # PyPI configuration file
 .pypirc
+
+# Streamlit specific
+.streamlit/secrets.toml
@@ -0,0 +1,3 @@
+
+run:
+	poetry run streamlit run app/main.py
@@ -1,2 +1,27 @@
 # llamacloud_streamlit
-Streamlit App Showcasing LlamaCloud's Capabilities
+This repo uses Streamlit to create an user-facing demo application that showcases various capabilities from LlamaCloud.
+
+You can use the application now by visiting https://example.com
+
+You will need access to LlamaCloud in order to create an API key first to use within the app.
+
+## Why did we create this?
+
+We wanted to create a demo that showcases some of the Agentic RAG capabilities that LlamaCloud enables through an interactive UX.
+Additionally, by open-sourcing the codebase for this, we hope that developers can use this code as a reference for setting up their own applications that rely on the LlamaCloud API.
+
+## Development Setup
+
+Here are the steps for setting up your development environment to run this project locally:
+
+1. Clone this repo e.g. `gh repo clone run-llama/llamacloud_streamlit`
+1. [Install `poetry`](https://python-poetry.org/docs/#installation) if you haven't already
+1. Install the poetry dependencies by running `poetry shell` and then `poetry install` within this project's root directory.
+1. Add a `secrets.toml` file in the `.streamlit` folder and add a value for `openai_key` to it
+    - `touch .streamlit/secrets.toml`
+    - Add a line within the newly created `secrets.toml` that reads `openai_key = "YOUR OPENAI API KEY"`
+1. Run `make run` to run the streamlit app locally. You can then visit the application at `http://localhost:8501`
+    - Please note you will need to setup the LlamaCloud API key the app will use on the API Keys tab in the UI first.
+
+
+
@@ -0,0 +1,28 @@
+import asyncio
+from collections import OrderedDict
+from typing import Callable, Coroutine, Dict
+import streamlit as st
+from tabs.api_key import api_key_tab
+from tabs.indices import indices_tab
+from tabs.composite_retriever import composite_retriever_tab
+from tabs.chat import chat_tab
+
+TABS_DICT: Dict[str, Callable[..., Coroutine]] = OrderedDict([
+    ("API Key", api_key_tab),
+    ("Indices", indices_tab),
+    ("Composite Retriever", composite_retriever_tab),
+    ("Chat", chat_tab),
+])
+
+
+async def main():
+    st.set_page_config(page_title="LlamaCloud App", page_icon="🦙", layout="centered", initial_sidebar_state="auto", menu_items=None)
+
+    tabs = st.tabs(TABS_DICT.keys())
+
+    for tab_name, tab in zip(TABS_DICT.keys(), tabs):
+        with tab:
+            await TABS_DICT[tab_name]()
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -0,0 +1,18 @@
+import streamlit as st
+
+async def api_key_tab():
+    st.write("Enter your API key for LlamaCloud:")
+    with st.form(key='api_key_form'):
+        if st.session_state.get("llx_base_url") is None:
+            st.session_state.llx_base_url = "https://api.staging.llamaindex.ai"
+        base_url = st.text_input("Base URL", key="llx_base_url", placeholder="https://api.staging.llamaindex.ai")
+        api_key = st.text_input("API Key", type="password", key="llx_api_key", placeholder="llx-...")
+        submit_button = st.form_submit_button(label='Submit')
+
+        if submit_button:
+            if not api_key.startswith("llx-"):
+                st.error("Invalid API key. Please try again.")
+            elif not base_url.startswith("http"):
+                st.error("Base URL must start with http. Please try again")
+            else:
+                st.toast("API Key submitted!")
@@ -0,0 +1,102 @@
+import streamlit as st
+from typing import List
+import openai
+from llama_index.llms.openai import OpenAI
+from llama_index.core.schema import TextNode, QueryBundle, NodeWithScore
+from llama_index.core import Settings
+from llama_index.core.retrievers import BaseRetriever
+from llama_index.core.chat_engine.types import BaseChatEngine
+from llama_index.core.chat_engine import CondensePlusContextChatEngine
+from llama_index.core.memory import ChatMemoryBuffer
+from llama_cloud.client import AsyncLlamaCloud
+from llama_cloud.types import Retriever
+from utils import get_llamacloud_client, get_project_selector
+
+class LlamaCloudCompositeRetriever(BaseRetriever):
+
+    def __init__(self, client: AsyncLlamaCloud, retriever: Retriever) -> None:
+        super().__init__()
+        self.client = client
+        self.retriever = retriever
+
+    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
+        raise NotImplementedError("Use aretrieve instead")
+
+    async def _aretrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
+        results = await self.client.retrievers.retrieve(retriever_id=self.retriever.id, query=query_bundle.query_str)
+        return [
+            NodeWithScore(
+                node=TextNode(
+                    id_=retrieved_node.id,
+                    text=retrieved_node.text,
+                    metadata=retrieved_node.metadata
+                ),
+                score=1.0,
+            )
+            for retrieved_node in results.nodes
+        ]
+
+async def chat_tab():
+    client = get_llamacloud_client()
+    if client is None:
+        st.write("Fill the form on the API Key tab first.")
+        return
+    openai.api_key = st.secrets.openai_key
+    Settings.llm = OpenAI(
+        model="gpt-4o-mini",
+        temperature=0.2,
+        system_prompt="You are a friendly Q&A Chatbot",
+        api_key=st.secrets.openai_key,
+    )
+    st.title("Chat with a Composite Retriever")
+    selected_project = await get_project_selector(client, "chat")
+    retrievers = await client.retrievers.list_retrievers(project_id=selected_project.id)
+    if not retrievers:
+        st.write(f"No retrievers found under '{selected_project.name}' project. Create a composite retriever first on the 'Composite Retriever' tab.")
+        return
+    retrievers = sorted(retrievers, key=lambda r: r.name)
+    selected_retriever = st.selectbox("Select Retriever", retrievers, format_func=lambda r: r.name, key="retriever_selector")
+
+
+    st.session_state.messages = st.session_state.get(
+        "messages",
+        [
+            {
+                "role": "assistant",
+                "content": "Ask me a question about the data ingested by the selected indices!",
+            }
+        ]
+    )
+
+    chat_engine: BaseChatEngine = st.session_state.get(
+        "chat_engine",
+        CondensePlusContextChatEngine.from_defaults(
+            retriever=LlamaCloudCompositeRetriever(
+                client=client,
+                retriever=selected_retriever
+            ),
+            chat_history=st.session_state.messages,
+            memory=ChatMemoryBuffer(token_limit=3000),
+            llm=Settings.llm,
+            verbose=True,
+        ),
+    )
+    st.session_state.chat_engine = chat_engine
+
+    if prompt := st.chat_input(
+        "Ask a question"
+    ):  # Prompt for user input and save to chat history
+        st.session_state.messages.append({"role": "user", "content": prompt})
+
+    for message in st.session_state.messages:  # Write message history to UI
+        with st.chat_message(message["role"]):
+            st.write(message["content"])
+
+    # If last message is not from assistant, generate a new response
+    if st.session_state.messages[-1]["role"] != "assistant":
+        with st.chat_message("assistant"):
+            response_stream = await chat_engine.achat(prompt)
+            st.write(response_stream.response)
+            message = {"role": "assistant", "content": response_stream.response}
+            # Add response to message history
+            st.session_state.messages.append(message)
@@ -0,0 +1,86 @@
+import pandas as pd
+import streamlit as st
+from utils import get_llamacloud_client, get_project_selector
+from llama_cloud.types import RetrieverCreate, RetrieverPipeline
+
+
+
+async def composite_retriever_tab():
+    client = get_llamacloud_client()
+    if client is None:
+        st.write("Fill the form on the API Key tab first.")
+        return
+    selected_project = await get_project_selector(client, "composite_retriever")
+    
+    project_container = st.container(border=True, key="project_container_composite_retriever")
+    project_container.header(selected_project.name)
+
+    pipelines = await client.pipelines.search_pipelines(project_id=selected_project.id)
+    pipelines = sorted(pipelines, key=lambda p: p.name)
+    pipeline_name_to_pipeline = {p.name: p for p in pipelines}
+    with project_container.form(key="create_composite_retriever_form"):
+        composite_retriever_name = st.text_input("Composite Retriever Name", key="composite_retriever_name")
+        # sub_indices: List[RetrieverPipeline] = []
+        # for idx, sub_index in enumerate(sub_indices):
+        #     sub_index_container = st.container(border=True, key=f"sub_index_{idx}")
+        #     sub_index_container.write(f"Sub-Index {idx}")
+        #     default_pipeline_idx = next((i for i, p in enumerate(pipelines) if p.id == sub_index.pipeline_id), 0)
+        #     selected_pipeline = st.selectbox("Select Sub-Index", pipelines, key=f"sub_index_{idx}_selectbox", index=default_pipeline_idx, format_func=lambda p: p.name)
+        #     sub_index.pipeline_id = selected_pipeline.id
+        # add_sub_index = st.button("Add Sub-Index")
+        retriever_pipelines_df = st.data_editor(
+            pd.DataFrame([{"name": None, "description": None, "pipeline_name": ""}]),
+            column_config={
+                "name": st.column_config.TextColumn("Name", required=True),
+                "description": st.column_config.TextColumn("Description", required=False),
+                "pipeline_name": st.column_config.SelectboxColumn("Index", options=[p.name for p in pipelines], required=True),
+            },
+            num_rows="dynamic",
+            hide_index=False,
+            key="retriever_pipelines_df"
+        )
+        create_composite_retriever_button = st.form_submit_button(label="Upsert Composite Retriever")
+            
+        if create_composite_retriever_button:
+            if not composite_retriever_name:
+                project_container.error("Composite Retriever name cannot be empty.")
+            else:
+                retriever_pipelines = [
+                    RetrieverPipeline(
+                        name=row.name,
+                        description=row.description,
+                        pipeline_id=pipeline_name_to_pipeline[row.pipeline_name].id
+                    )
+                    for row in retriever_pipelines_df.itertuples()
+                ]
+                retriever_create_payload = RetrieverCreate(
+                    name=composite_retriever_name,
+                    pipelines=retriever_pipelines
+                )
+                await client.retrievers.upsert_retriever(project_id=selected_project.id, request=retriever_create_payload)
+                project_container.success(f"Composite Retriever {composite_retriever_name} upserted!")
+    
+    retrievers = await client.retrievers.list_retrievers(project_id=selected_project.id)
+    if not retrievers:
+        project_container.write("No existing Composite Retrievers found.")
+        return
+    pipeline_id_to_pipeline = {p.id: p for p in pipelines}
+    project_container.write(f"Composite Retrievers:")
+    for retriever in retrievers:
+        retriever_container = project_container.container(border=True, key="retriever_" + retriever.id)
+        retriever_container.subheader(retriever.name)
+        retriever_container.write(f"Retriever ID: {retriever.id}")
+        retriever_container.write(f"Sub-Indices in Composite Retriever: {len(retriever.pipelines)}")
+        for sub_index in retriever.pipelines:
+            sub_index_container = retriever_container.container(border=True, key=f"sub_index_{sub_index.name}")
+            sub_index_pipeline = pipeline_id_to_pipeline[sub_index.pipeline_id]
+            sub_index_container.subheader(f"Sub-Index: {sub_index.name}")
+            sub_index_container.write(f"Description: {sub_index.description}")
+            sub_index_container.write(f"Pipeline: {sub_index_pipeline.name}")
+        delete_button = retriever_container.button("Delete 🗑️", key="delete_retriever_" + retriever.id)
+        if delete_button:
+            await client.retrievers.delete_retriever(retriever_id=retriever.id)
+            retrievers = await client.retrievers.list_retrievers(project_id=selected_project.id)
+            project_container.success(f"Composite Retriever {retriever.name} deleted!")
+
+    
@@ -0,0 +1,69 @@
+from typing import List
+import streamlit as st
+from utils import get_llamacloud_client, get_project_selector
+from llama_cloud.types import (
+    PipelineCreate,
+    PipelineFileCreate,
+    File,
+    PipelineTransformConfig_Auto,
+    PipelineCreateEmbeddingConfig_OpenaiEmbedding,
+    OpenAiEmbedding
+)
+
+
+async def indices_tab():
+    client = get_llamacloud_client()
+    if client is None:
+        st.write("Fill the form on the API Key tab first.")
+        return
+    supported_extensions = await client.parsing.get_supported_file_extensions()
+    selected_project = await get_project_selector(client, "indices")
+
+    project_container = st.container(border=True, key="project_container_indices")
+    project_container.header(selected_project.name)
+
+    with project_container.form(key="create_pipeline_form"):
+        pipeline_name = st.text_input("Index Name", key="pipeline_name")
+        create_pipeline_button = st.form_submit_button(label="Create Index")
+            
+        if create_pipeline_button:
+            if not pipeline_name:
+                project_container.error("Pipeline name cannot be empty.")
+            else:
+                openai_embedding = OpenAiEmbedding(api_key=st.secrets.openai_key)
+                embedding_config = PipelineCreateEmbeddingConfig_OpenaiEmbedding(type="OPENAI_EMBEDDING", component=openai_embedding)
+                pipeline_payload = PipelineCreate(
+                    name=pipeline_name,
+                    transform_config=PipelineTransformConfig_Auto(mode="auto"),
+                    embedding_config=embedding_config,
+                )
+                await client.pipelines.upsert_pipeline(project_id=selected_project.id, request=pipeline_payload)
+                project_container.success(f"Pipeline {pipeline_name} created!")
+    
+    pipelines = await client.pipelines.search_pipelines(project_id=selected_project.id)
+    project_container.write(f"Add files to indices:")
+    file_types = [supported_extension.lower() for supported_extension in supported_extensions]
+    for pipeline in pipelines:
+        pipeline_files = await client.pipelines.list_pipeline_files(pipeline_id=pipeline.id)
+        pipeline_container = project_container.container(border=True, key="add_files_pipeline_" + pipeline.id)
+        pipeline_container.subheader(pipeline.name)
+        pipeline_container.write(f"Index ID: {pipeline.id}")
+        pipeline_container.write(f"Files in index: {len(pipeline_files)}")
+        with pipeline_container.form(key=f"add_files_form_pipeline_{pipeline.id}"):
+            uploaded_files = st.file_uploader("Upload Files", type=file_types, key="files_" + pipeline.id, accept_multiple_files=True)
+            add_files_button = st.form_submit_button(label="Add Files")
+            if not add_files_button:
+                continue
+            project_files: List[File] = []
+            for idx, uploaded_file in enumerate(uploaded_files):
+                project_file = await client.files.upload_file(project_id=pipeline.project_id, upload_file=uploaded_file)
+                project_files.append(project_file)
+                st.toast(f"{idx + 1}/{len(uploaded_files)}: File {uploaded_file.name} uploaded to project {selected_project.name}!")
+            await client.pipelines.add_files_to_pipeline(
+                pipeline_id=pipeline.id,
+                request=[
+                    PipelineFileCreate(file_id=project_file.id)
+                    for project_file in project_files
+                ]
+            )
+            st.toast(f"{len(uploaded_files)} Files added to pipeline {pipeline.name}!")
@@ -0,0 +1,29 @@
+from typing import Sequence, Optional, List
+from llama_cloud.client import AsyncLlamaCloud
+from llama_cloud.types import ProjectCreate, Project
+import streamlit as st
+
+DEFAULT_STREAMLIT_PROJECT_NAME = "Streamlit Project"
+
+def check_session_state_keys_populated(required_state_keys: Sequence[str]) -> bool:
+    return all(st.session_state.get(key) for key in required_state_keys)
+
+
+def get_llamacloud_client() -> Optional[AsyncLlamaCloud]:
+    if not check_session_state_keys_populated(["llx_base_url", "llx_api_key"]):
+        return None
+    return AsyncLlamaCloud(base_url=st.session_state.llx_base_url, token=st.session_state.llx_api_key)
+
+async def get_project_selector(client: AsyncLlamaCloud, key_suffix: str, default_project_name: str = DEFAULT_STREAMLIT_PROJECT_NAME) -> Project:
+    default_project = await client.projects.upsert_project(request=ProjectCreate(name=default_project_name))
+    st.session_state.project_id = st.session_state.get("project_id", default_project.id)
+    projects: List[Project] = await client.projects.list_projects(organization_id=default_project.organization_id)
+    projects = sorted(projects, key=lambda p: p.name)
+    default_project_idx = next((i for i, p in enumerate(projects) if p.id == st.session_state.project_id), 0)
+    selected_project = st.selectbox("Select Project",
+                                    projects,
+                                    key=f"project_selector_{key_suffix}",
+                                    index=default_project_idx,
+                                    format_func=lambda p: p.name)
+    st.session_state.project_id = selected_project.id
+    return selected_project
@@ -0,0 +1,23 @@
+[tool.poetry]
+name = "llamacloud-streamlit"
+version = "0.1.0"
+description = "Streamlit App Showcasing LlamaCloud's Capabilities"
+authors = ["Sourabh Desai <sourabh@runllama.ai>"]
+license = "MIT"
+readme = "README.md"
+
+[tool.poetry.dependencies]
+python = "^3.11"
+llama-index = "^0.12.8"
+llama-index-embeddings-openai = "^0.3.1"
+llama-index-indices-managed-llama-cloud = "^0.6.3"
+streamlit = "^1.41.1"
+llama-cloud = "^0.1.7"
+
+
+[tool.poetry.group.dev.dependencies]
+watchdog = "^6.0.0"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"