Wikipedia RAG app

2026-07-01 10:04:31 -04:00 · 2024-01-08 10:51:51 -08:00
commit 6d1b0f35f7
7 changed files with 2010 additions and 0 deletions
@@ -0,0 +1,19 @@
+FROM python:3.11-slim
+
+RUN pip install poetry==1.6.1
+
+RUN poetry config virtualenvs.create false
+
+WORKDIR /code
+
+COPY ./pyproject.toml ./README.md ./poetry.lock* ./
+
+RUN poetry install  --no-interaction --no-ansi --no-root
+
+COPY ./app ./app
+
+RUN poetry install --no-interaction --no-ansi
+
+EXPOSE 8080
+
+CMD exec uvicorn app.server:app --host 0.0.0.0 --port 8080
@@ -0,0 +1,86 @@
+## Pinecone-Wikipedia
+
+Wikipedia is a rich source of informatiomn well-suited for semantic search.
+
+Recent efforts have indexed Wikipedia using Cohere embeddings [here](https://huggingface.co/datasets/Cohere/wikipedia-22-12) and [here](https://huggingface.co/datasets/Cohere/wikipedia-22-12-en-embeddings?row=6).
+
+< To add context here >
+
+### Index
+
+< To add context here >
+
+### API keys
+
+Ensure these are set:
+
+* PINECONE_API_KEY
+* PINECONE_ENVIRONMENT
+* PINECONE_INDEX_NAME 
+* COHERE_API_KEY 
+* OPENAI_API_KEY
+
+### Deployment
+
+This repo was created by following these steps:
+
+**(1) Create a LangChain app.**
+
+Run:
+```
+langchain app new .  
+```
+
+This creates two folders:
+```
+app: This is where LangServe code will live
+packages: This is where your chains or agents will live
+```
+
+It also creates:
+```
+Dockerfile: App configurations
+pyproject.toml: Project configurations
+```
+
+We won't need `packages`:
+```
+rm -rf packages
+```
+
+Modify the Dockerfile to remove `COPY ./packages ./packages`.
+
+**(2) Add your runnable (RAG app)**
+
+Create a file, `chain.py` with a runnable named `chain` that you want to execute. This is our RAG logic.
+
+Add `chain.py` to `app` directory.
+
+Import the runnable in `server.py`:
+```
+from app.chain import chain as pinecone_wiki_chain
+add_routes(app, pinecone_wiki_chain, path="/pinecone-wikipedia")
+```
+
+Add your app dependencies to `pyproject.toml` and `poetry.lock`:
+```
+poetry add pinecone-client
+poetry add cohere
+poetry add openai
+```
+
+Update enviorment based on the updated lock file:
+```
+poetry install
+```
+
+Run locally
+```
+poetry run langchain serve
+```
+
+**(3) Deploy it with hosted LangServe**
+
+Go to your LangSmith console and select `New Deployment`.
+
+Specify the Github url along with the abovementioned API keys.
@@ -0,0 +1,38 @@
+import os
+import pinecone
+from langchain_core.pydantic_v1 import BaseModel
+from langchain_community.chat_models import ChatOpenAI
+from langchain_community.vectorstores import Pinecone
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_community.embeddings import CohereEmbeddings
+from langchain_core.runnables import RunnableParallel, RunnablePassthrough
+
+# Keys
+PINECONE_API_KEY = os.environ["PINECONE_API_KEY"]
+PINECONE_ENVIRONMENT = os.environ["PINECONE_ENVIRONMENT"]
+PINECONE_INDEX_NAME = os.environ["PINECONE_INDEX_NAME"]
+
+# Vectorstore
+pinecone.init(api_key=PINECONE_API_KEY,
+              environment=PINECONE_ENVIRONMENT)
+embeddings = CohereEmbeddings(model="multilingual-22-12")
+vectorstore = Pinecone.from_existing_index(PINECONE_INDEX_NAME, 
+                                           embeddings)
+retriever = vectorstore.as_retriever()
+
+# RAG prompt
+template = """Answer the question based only on the following context:
+{context}
+Question: {question}
+"""
+prompt = ChatPromptTemplate.from_template(template)
+
+# RAG
+model = ChatOpenAI()
+chain = (
+    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
+    | prompt
+    | model
+    | StrOutputParser()
+)
@@ -0,0 +1,20 @@
+from fastapi import FastAPI
+from fastapi.responses import RedirectResponse
+from langserve import add_routes
+from app.chain import chain as pinecone_wiki_chain
+
+app = FastAPI()
+
+
+@app.get("/")
+async def redirect_root_to_docs():
+    return RedirectResponse("/docs")
+
+
+# Edit this to add the chain you want to add
+add_routes(app, pinecone_wiki_chain, path="/pinecone-wikipedia")
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app, host="0.0.0.0", port=8000)
@@ -0,0 +1,26 @@
+[tool.poetry]
+name = "pinecone-wikipedia"
+version = "0.1.0"
+description = ""
+authors = ["Your Name <you@example.com>"]
+readme = "README.md"
+packages = [
+    { include = "app" },
+]
+
+[tool.poetry.dependencies]
+python = "^3.11"
+uvicorn = "^0.23.2"
+langserve = {extras = ["server"], version = ">=0.0.30"}
+pydantic = "<2"
+pinecone-client = "^2.2.4"
+cohere = "^4.40"
+openai = "^1.6.1"
+
+
+[tool.poetry.group.dev.dependencies]
+langchain-cli = ">=0.0.15"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"