Update with simplified app

2026-07-01 20:14:34 -04:00 · 2023-07-25 15:34:44 -07:00
parent b3e83890f7
commit 9d2b6e5074
5 changed files with 72 additions and 142 deletions
@@ -1,25 +1,22 @@
 ## Web Wanderer

-This is a lightweight app for the LangChain [Web Research Retriever](https://github.com/langchain-ai/langchain/pull/8102).
+This is a lightweight app using the [Web Research Retriever](https://github.com/langchain-ai/langchain/pull/8102).

 You only need to supply a few thiings.

+In `settings()` function, supply:
+
 ### Search
-Supply search functionality e.g., Google - 
-```
-export GOOGLE_CSE_ID=xxx
-export GOOGLE_API_KEY=xxx
-search = GoogleSearchAPIWrapper() 
-```
+Select the search tool you want to use (e.g., GoogleSearchAPIWrapper).

-### Public API
-Supply API key(s) e.g., OpenAI -
-```
-export OPENAI_API_KEY=sk-xxx
-```
+### Vectorstore
+Select the vectorstore you want to use (e.g., Chroma).

-### Private
-Follow [setup](https://python.langchain.com/docs/use_cases/question_answering/local_retrieval_qa) for local LLMs and supply path. 
+### LLM
+Select the vectorstore you want to use (e.g., ChatOpenAI).

-### Run
-streamlit run web_wanderer.py
+Then, run:
+
+```
+streamlit run web_explorer.py
+```
@@ -0,0 +1,59 @@
+import os
+import streamlit as st
+from langchain.chains import RetrievalQAWithSourcesChain
+from langchain.retrievers.web_research import WebResearchRetriever
+
+@st.cache_resource
+def settings():
+
+    # Vectorstore
+    from langchain.vectorstores import Chroma
+    from langchain.embeddings.openai import OpenAIEmbeddings
+    vectorstore_public = Chroma(embedding_function=OpenAIEmbeddings())
+
+    # LLM
+    from langchain.chat_models import ChatOpenAI
+    llm = ChatOpenAI(temperature=0)
+
+    # Search
+    from langchain.utilities import GoogleSearchAPIWrapper
+    os.environ["GOOGLE_CSE_ID"] = "xxx"
+    os.environ["GOOGLE_API_KEY"] = "xxx"
+    search = GoogleSearchAPIWrapper()   
+
+    # Initialize 
+    web_retriever = WebResearchRetriever.from_llm(
+    vectorstore=vectorstore_public,
+    llm=llm, 
+    search=search, 
+    )
+
+    return web_retriever, llm
+
+st.sidebar.image("img/ai.png")
+st.header("`Interweb Explorer`")
+st.info("`I am an AI that can answer questions by exploring, reading, and summarizing web pages."
+    "I can be configured to use different moddes: public API or private (no data sharing).`")
+
+# Make retriever and llm
+web_retriever, llm = settings()
+
+# User input 
+question = st.text_input("`Ask a question:`")
+
+if question:
+
+    # Generate answer (w/ citations)
+    import logging
+    logging.basicConfig()
+    logging.getLogger("langchain.retrievers.web_research").setLevel(logging.INFO)    
+    qa_chain = RetrievalQAWithSourcesChain.from_chain_type(llm,
+                                                           retriever=web_retriever)
+    
+    # Write answer and sources
+    result = qa_chain({"question": question})
+    st.info('`Answer:`')
+    st.info(result['answer'])
+    st.info('`Source:`')
+    st.info(result['sources'])
+        
@@ -1,126 +0,0 @@
-import os
-import streamlit as st
-from langchain.schema import ChatMessage
-from langchain.callbacks.base import BaseCallbackHandler
-from langchain.chains import RetrievalQAWithSourcesChain
-from langchain.retrievers.web_research import WebResearchRetriever
-
-@st.cache_resource
-def public_settings(_stream_handler):
-
-    """ LLM settings for public mode """
-
-    # Vectorstore
-    from langchain.vectorstores import Chroma
-    from langchain.embeddings.openai import OpenAIEmbeddings
-    vectorstore = Chroma(embedding_function=OpenAIEmbeddings(),persist_directory="./chroma_db_oai")
-
-    # LLM
-    from langchain.chat_models import ChatOpenAI
-    # TO DO: Stream formatting isn't great  
-    llm = ChatOpenAI(temperature=0,streaming=True, callbacks=[stream_handler])
-
-    return vectorstore, llm
-
-@st.cache_resource
-def private_settings(_stream_handler):
-
-    """ LLM settings for privagte mode """
-
-    # Vectorstore
-    from langchain.vectorstores import Chroma
-    from langchain.embeddings import GPT4AllEmbeddings
-    vectorstore = Chroma(embedding_function=GPT4AllEmbeddings(),persist_directory="./chroma_db_llama")
-
-    # LLM
-    from langchain.llms import LlamaCpp
-    from langchain.callbacks.manager import CallbackManager
-    from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler 
-    n_gpu_layers = 1  # Metal set to 1 is enough.
-    n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
-    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
-    llm = LlamaCpp(
-        model_path="/Users/rlm/Desktop/Code/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin",
-        n_gpu_layers=n_gpu_layers,
-        n_batch=n_batch,
-        n_ctx=4096,  # Context window
-        max_tokens=1000,  # Max tokens to generate
-        f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
-        # TO DO: Make this stream
-        # callbacks=[stream_handler]? 
-        callback_manager=callback_manager,
-        verbose=True,
-    )
-
-    return vectorstore, llm
-
-@st.cache_resource
-def make_web_retriever(_vectorstore, _llm):
-
-    """ Make web retriever """
-
-    # Search 
-    from langchain.utilities import GoogleSearchAPIWrapper
-    search = GoogleSearchAPIWrapper()   
-
-    # Initialize 
-    web_research_retriever = WebResearchRetriever.from_llm(
-    vectorstore=vectorstore,
-    llm=llm, 
-    search=search, 
-    )
-
-    return web_research_retriever
-
-class StreamHandler(BaseCallbackHandler):
-    def __init__(self, container, initial_text=""):
-        self.container = container
-        self.text = initial_text
-
-    def on_llm_new_token(self, token: str, **kwargs) -> None:
-        self.text += token
-        self.container.markdown(self.text)
-
-st.sidebar.image("img/bot.png")
-
-with st.sidebar.form("user_input"):
-
-    # Pinecone params 
-    mode = st.radio("`Operating mode:`",
-                          ("Public",
-                           "Private"),
-                          index=0)
-    
-    submitted = st.form_submit_button("Set mode")
-
-# Info 
-st.header("`Web Wanderer`")
-st.info("`I am a research assistant to answer questions by exploring, reading, and summarizing web pages."
-    "I can be easily configured to use different moddes, public API or private (no data sharing).`")
-
-# LLM
-stream_handler = StreamHandler(st.empty())
-if mode == "Public":
-    vectorstore, llm = public_settings(stream_handler)
-elif mode == "Private":
-    vectorstore, llm = private_settings(stream_handler)
-    
-# Retriever
-web_retriever = make_web_retriever(vectorstore, llm)
-
-# User input 
-question = st.text_input("`Ask a question:`")
-
-if question:
-
-    # Generate answer (w/ citations)
-    import logging
-    logging.basicConfig()
-    logging.getLogger("langchain.retrievers.web_research").setLevel(logging.INFO)    
-    qa_chain = RetrievalQAWithSourcesChain.from_chain_type(llm,retriever=web_retriever)
-    result = qa_chain({"question": question})
-    st.info('Distilled answer:')
-    st.info(result['answer'])
-    st.info('Sources:')
-
-