mirror of
https://github.com/run-llama/notebookllama.git
synced 2026-06-30 22:17:57 -04:00
Adding document chat and moving to a multi-page app
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
OPENAI_API_KEY="sk-***"
|
||||
LLAMACLOUD_API_KEY="llx-***"
|
||||
ELEVENLABS_API_KEY="sk_***"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
The MIT License
|
||||
|
||||
Copyright (c) Clelia Astra Bertelli
|
||||
Copyright (c) Jerry Liu
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
@@ -1,3 +1,67 @@
|
||||
# NotebookLM clone
|
||||
# NotebookLlaMa🦙
|
||||
|
||||
This project is aimed at producing a fully open-source, LlamaCloud-backed alternative to NotebookLM.
|
||||
## A fluffy and open-source alternative to NotebookLM!
|
||||
|
||||
This project is aimed at producing a fully open-source, [**LlamaCloud**](https//cloud.llamaindex.ai)-backed alternative to NotebookLM.
|
||||
|
||||
### Get it up and running!
|
||||
|
||||
Get the GitHub repository:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/run-llama/notebooklm-clone
|
||||
```
|
||||
|
||||
Install dependencies:
|
||||
|
||||
```bash
|
||||
cd notebooklm-clone/
|
||||
uv sync
|
||||
```
|
||||
|
||||
Modify the `.env.example` file with your API keys:
|
||||
|
||||
- `OPENAI_API_KEY`: find it [on OpenAI Platform](https://platform.openai.com/api-keys)
|
||||
- `ELEVENLABS_API_KEY`: find it [on ElevenLabs Settings](https://elevenlabs.io/app/settings/api-keys)
|
||||
- `LLAMACLOUD_API_KEY`: find it [on LlamaCloud Dashboard](https://cloud.llamaindex.ai/)
|
||||
|
||||
Rename the file to `.env`:
|
||||
|
||||
```bash
|
||||
mv .env.example .env
|
||||
```
|
||||
|
||||
Now, you will have to execute the following scripts:
|
||||
|
||||
```bash
|
||||
uv run tools/create_llama_extract_agent.py
|
||||
uv run tools/create_llama_cloud_index.py
|
||||
```
|
||||
|
||||
And you're ready to set up the app!
|
||||
|
||||
Run the **MCP** server:
|
||||
|
||||
```bash
|
||||
uv run src/notebooklm_clone/server.py
|
||||
```
|
||||
|
||||
Now, launch the Streamlit app:
|
||||
|
||||
```bash
|
||||
streamlit run src/notebooklm_clone/Home.py
|
||||
```
|
||||
|
||||
> [!IMPORTANT]
|
||||
>
|
||||
> _You might need to install `ffmpeg` if you do not have it installed already_
|
||||
|
||||
And start exploring the app at `http://localhost:8751/`.
|
||||
|
||||
### Contributing
|
||||
|
||||
Contribute to this project following the [guidelines](./CONTRIBUTING.md).
|
||||
|
||||
### License
|
||||
|
||||
This project is provided under an [MIT License](LICENSE).
|
||||
|
||||
@@ -53,8 +53,8 @@ def sync_create_podcast(file_content: str):
|
||||
|
||||
# Display the network
|
||||
st.set_page_config(
|
||||
page_title="NotebookLlaMa",
|
||||
page_icon="🦙",
|
||||
page_title="NotebookLlaMa - Home",
|
||||
page_icon="🏠",
|
||||
layout="wide",
|
||||
menu_items={
|
||||
"Get Help": "https://github.com/run-llama/notebooklm-clone/discussions/categories/general",
|
||||
@@ -62,7 +62,10 @@ st.set_page_config(
|
||||
"About": "An OSS alternative to NotebookLM that runs with the power of a flully Llama!",
|
||||
},
|
||||
)
|
||||
st.title("NotebookLlaMa🦙")
|
||||
st.sidebar.header("Home🏠")
|
||||
st.sidebar.info("To switch to the Document Chat, select it from above!🔺")
|
||||
st.markdown("---")
|
||||
st.markdown("## NotebookLlaMa - Home🦙")
|
||||
|
||||
file_input = st.file_uploader(
|
||||
label="Upload your source PDF file!", accept_multiple_files=False
|
||||
@@ -57,10 +57,11 @@ class MultiTurnConversation(BaseModel):
|
||||
|
||||
|
||||
class PodcastGenerator(BaseModel):
|
||||
model_config: ConfigDict = ConfigDict(arbitrary_types_allowed=True)
|
||||
llm: StructuredLLM
|
||||
client: AsyncElevenLabs
|
||||
|
||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_podcast(self) -> Self:
|
||||
try:
|
||||
@@ -69,6 +70,7 @@ class PodcastGenerator(BaseModel):
|
||||
raise ValueError(
|
||||
f"The output class of the structured LLM must be {MultiTurnConversation.__qualname__}, your LLM has output class: {self.llm.output_cls.__qualname__}"
|
||||
)
|
||||
return self
|
||||
|
||||
async def _conversation_script(self, file_transcript: str) -> MultiTurnConversation:
|
||||
response = await self.llm.achat(
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
import streamlit as st
|
||||
import asyncio
|
||||
|
||||
from llama_index.tools.mcp import BasicMCPClient
|
||||
|
||||
MCP_CLIENT = BasicMCPClient(command_or_url="http://localhost:8000/mcp")
|
||||
|
||||
|
||||
async def chat(inpt: str):
|
||||
result = await MCP_CLIENT.call_tool(
|
||||
tool_name="query_index_tool", arguments={"question": inpt}
|
||||
)
|
||||
return result.content[0].text
|
||||
|
||||
|
||||
def sync_chat(inpt: str):
|
||||
return asyncio.run(chat(inpt))
|
||||
|
||||
|
||||
# Chat Interface
|
||||
st.set_page_config(page_title="NotebookLlaMa - Document Chat", page_icon="🗣")
|
||||
|
||||
st.sidebar.header("Document Chat🗣")
|
||||
st.sidebar.info("To switch to the Home page, select it from above!🔺")
|
||||
st.markdown("---")
|
||||
st.markdown("## NotebookLlaMa - Document Chat🗣")
|
||||
|
||||
# Initialize chat history
|
||||
if "messages" not in st.session_state:
|
||||
st.session_state.messages = []
|
||||
|
||||
# Display chat messages from history on app rerun
|
||||
for i, message in enumerate(st.session_state.messages):
|
||||
with st.chat_message(message["role"]):
|
||||
if message["role"] == "assistant" and "sources" in message:
|
||||
# Display the main response
|
||||
st.markdown(message["content"])
|
||||
# Add toggle for sources
|
||||
with st.expander("Sources"):
|
||||
st.markdown(message["sources"])
|
||||
else:
|
||||
st.markdown(message["content"])
|
||||
|
||||
# React to user input
|
||||
if prompt := st.chat_input("Ask a question about your document"):
|
||||
# Display user message in chat message container
|
||||
st.chat_message("user").markdown(prompt)
|
||||
# Add user message to chat history
|
||||
st.session_state.messages.append({"role": "user", "content": prompt})
|
||||
|
||||
# Get bot response
|
||||
with st.chat_message("assistant"):
|
||||
with st.spinner("Thinking..."):
|
||||
try:
|
||||
response = sync_chat(prompt)
|
||||
|
||||
# Split response and sources if they exist
|
||||
# Assuming your response format includes sources somehow
|
||||
# You might need to modify this based on your actual response format
|
||||
if "## Sources" in response:
|
||||
parts = response.split("## Sources", 1)
|
||||
main_response = parts[0].strip()
|
||||
sources = "## Sources" + parts[1].strip()
|
||||
else:
|
||||
main_response = response
|
||||
sources = None
|
||||
|
||||
st.markdown(main_response)
|
||||
|
||||
# Add toggle for sources if they exist
|
||||
if sources:
|
||||
with st.expander("Sources"):
|
||||
st.markdown(sources)
|
||||
# Add to history with sources
|
||||
st.session_state.messages.append(
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": main_response,
|
||||
"sources": sources,
|
||||
}
|
||||
)
|
||||
else:
|
||||
# Add to history without sources
|
||||
st.session_state.messages.append(
|
||||
{"role": "assistant", "content": main_response}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error: {str(e)}"
|
||||
st.markdown(error_msg)
|
||||
st.session_state.messages.append(
|
||||
{"role": "assistant", "content": error_msg}
|
||||
)
|
||||
@@ -6,8 +6,6 @@ import warnings
|
||||
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
from llama_index.core.llms import ChatMessage
|
||||
from llama_index.core.query_engine.multistep_query_engine import MultiStepQueryEngine
|
||||
from llama_index.core.indices.query.query_transform import StepDecomposeQueryTransform
|
||||
from llama_cloud_services import LlamaExtract, LlamaParse
|
||||
from llama_cloud_services.extract import SourceText
|
||||
from llama_cloud.client import AsyncLlamaCloud
|
||||
@@ -90,11 +88,9 @@ if (
|
||||
)
|
||||
PARSER = LlamaParse(api_key=os.getenv("LLAMACLOUD_API_KEY"), result_type="markdown")
|
||||
PIPELINE_ID = os.getenv("LLAMACLOUD_PIPELINE_ID")
|
||||
qe = LlamaCloudIndex(
|
||||
QE = LlamaCloudIndex(
|
||||
api_key=os.getenv("LLAMACLOUD_API_KEY"), pipeline_id=PIPELINE_ID
|
||||
).as_query_engine(llm=LLM)
|
||||
step_decompose = StepDecomposeQueryTransform(llm=LLM)
|
||||
MS_QE = MultiStepQueryEngine(query_engine=qe, query_transform=step_decompose)
|
||||
LLM_STRUCT = LLM.as_structured_llm(MindMap)
|
||||
|
||||
|
||||
@@ -157,13 +153,13 @@ async def get_mind_map(summary: str, highlights: List[str]) -> Union[str, None]:
|
||||
|
||||
|
||||
async def query_index(question: str) -> Union[str, None]:
|
||||
response = await MS_QE.aquery(question)
|
||||
response = await QE.aquery(question)
|
||||
if not response.response:
|
||||
return None
|
||||
sources = [node.text for node in response.source_nodes]
|
||||
return (
|
||||
"## Answer\n\n"
|
||||
+ response.response
|
||||
+ "\n\n##Sources\n\n- "
|
||||
+ "\n\n## Sources\n\n- "
|
||||
+ "\n- ".join(sources)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user