wip

2026-07-01 20:44:18 -04:00 · 2023-04-17 20:26:17 -07:00
parent 9cee32317c
commit 143cfee9ce
11 changed files with 166 additions and 299 deletions
@@ -34,4 +34,4 @@ try:
 		"user_id": "u-1234abcd5678efgh", \
 		"conversation_id": "c-jklm9012nopq3456", \
 		"message_id": "2" \
-	}' -N
+	}' -N
@@ -1,26 +1,29 @@
 # Poe Knowledge Bot with LlamaIndex

-A knowledge-augmented Poe bot powered by 
-[LlamaIndex](https://gpt-index.readthedocs.io/en/latest/)
-and FastAPI.
+A knowledge-augmented Poe bot powered by
+[LlamaIndex](https://gpt-index.readthedocs.io/en/latest/) and FastAPI.

 Easily ingest and chat with your own data as a knowledge base!

 ## Quick Start

 Follow these steps to quickly setup and run the LlamaIndex bot for Poe:
+
 ### Setup Environment
+
 1. Install poetry: `pip install poetry`
 2. Install app dependencies: `poetry install`
 3. Setup environment variables

-| Name             | Required | Description                                                                                                                                                                                |
-| ---------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `POE_API_KEY`   | Yes      | This is a secret token that you need to authenticate Poe requests to the API. You can generate this from https://poe.com/create_bot?api=1.                |
+| Name             | Required | Description                                                                                                                                                |
+| ---------------- | -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `POE_API_KEY`    | Yes      | This is a secret token that you need to authenticate Poe requests to the API. You can generate this from https://poe.com/create_bot?api=1.                 |
 | `OPENAI_API_KEY` | Yes      | This is your OpenAI API key that LlamaIndex needs to call OpenAI services. You can get an API key by creating an account on [OpenAI](https://openai.com/). |

 ### Run API Server
-* Run the API locally: `poetry run start`
+
+- Run the API locally: `poetry run start`
+
 ```console
 INFO:poe_api.llama_handler:Creating new index
 INFO:poe_api.llama_handler:Loading data from data/
@@ -28,50 +31,60 @@ INFO:llama_index.token_counter.token_counter:> [insert] Total LLM token usage: 0
 INFO:llama_index.token_counter.token_counter:> [insert] Total embedding token usage: 19274 tokens
 2023-04-17 15:24:05,159 - INFO - Application startup complete.
 ```
-* Make the API publicly available with [ngrok](https://ngrok.com/): in a different terminal, run `ngrok http 8080`
+
+- Make the API publicly available with [ngrok](https://ngrok.com/): in a different
+  terminal, run `ngrok http 8080`

 ### Connect Poe to your Bot
-* Create your bot at https://poe.com/create_bot?api=1 
-* Interact with your bot at https://poe.com/

+- Create your bot at https://poe.com/create_bot?api=1
+- Interact with your bot at https://poe.com/

 ## Test Your LlamaIndex Bot
-To quickly verify if your bot is up and running, go to 
-the Swagger UI at http://localhost:8080/docs, authenticate with your `POE_API_KEY` and issue a query (satisfying the [Poe Protocol](https://github.com/poe-platform/poe-protocol/blob/main/docs/spec.md))

+To quickly verify if your bot is up and running, go to the Swagger UI at
+http://localhost:8080/docs, authenticate with your `POE_API_KEY` and issue a query
+(satisfying the
+[Poe Protocol](https://github.com/poe-platform/poe-protocol/blob/main/docs/spec.md))
+
+Alternatively, to use a sample query, replace `<add your POE_API_KEY here>` in
+`Makefile` with your `POE_API_KEY`, then run:

-Alternatively, to use a sample query, replace `<add your POE_API_KEY here>` in `Makefile` with your `POE_API_KEY`,
-then run:
 ```console
 make try
 ```

 ## Customize Your LlamaIndex Bot
-By default, we ingest documents under `data/` and index them with a `GPTSimpleVectorIndex`.
+
+By default, we ingest documents under `data/` and index them with a
+`GPTSimpleVectorIndex`.

 You can configure the default behavior via environment variables:

-| Name             | Required | Description                                                                                                                                                                                |
-| ---------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `LLAMA_LOAD_DATA`   | Optional      | Whether to ingest documents in `DATA_DIR`.Defaults to `True`                |
-| `LLAMA_DATA_DIR` | Optional      | Directory to ingest initial documents from. Defaults to `data/` |
-| `LLAMA_INDEX_TYPE` | Optional      | Index type (see below for details). Defaults to `simple_dict`  |
-| `INDEX_JSON_PATH` | Optional      |  Path to saved Index json file. `save/index.json`|
+| Name               | Required | Description                                                     |
+| ------------------ | -------- | --------------------------------------------------------------- |
+| `LLAMA_LOAD_DATA`  | Optional | Whether to ingest documents in `DATA_DIR`.Defaults to `True`    |
+| `LLAMA_DATA_DIR`   | Optional | Directory to ingest initial documents from. Defaults to `data/` |
+| `LLAMA_INDEX_TYPE` | Optional | Index type (see below for details). Defaults to `simple_dict`   |
+| `INDEX_JSON_PATH`  | Optional | Path to saved Index json file. `save/index.json`                |

-**Different Index Types**
-By default, we use a `GPTSimpleVectorIndex` to store document chunks in memory, 
-and retrieve top-k nodes by embedding similarity.
-Different index types are optimized for different data and query use-cases.
-See this guide on [How Each Index Works](https://gpt-index.readthedocs.io/en/latest/guides/primer/index_guide.html) to learn more.
-You can configure the index type via the `LLAMA_INDEX_TYPE`, see [here](https://gpt-index.readthedocs.io/en/latest/reference/indices/composability_query.html#gpt_index.data_structs.struct_type.IndexStructType) for the full list of accepted index type identifiers.
+**Different Index Types** By default, we use a `GPTSimpleVectorIndex` to store document
+chunks in memory, and retrieve top-k nodes by embedding similarity. Different index
+types are optimized for different data and query use-cases. See this guide on
+[How Each Index Works](https://gpt-index.readthedocs.io/en/latest/guides/primer/index_guide.html)
+to learn more. You can configure the index type via the `LLAMA_INDEX_TYPE`, see
+[here](https://gpt-index.readthedocs.io/en/latest/reference/indices/composability_query.html#gpt_index.data_structs.struct_type.IndexStructType)
+for the full list of accepted index type identifiers.

-
-Read more details on [readthedocs](https://gpt-index.readthedocs.io/en/latest/), 
-and engage with the community on [discord](https://discord.com/invite/dGcwcsnxhU).
+Read more details on [readthedocs](https://gpt-index.readthedocs.io/en/latest/), and
+engage with the community on [discord](https://discord.com/invite/dGcwcsnxhU).

 ## Ingesting Data
-LlamaIndex bot for Poe also exposes an API for ingesting additional data by `POST` to `/add_document` endpoint.
+
+LlamaIndex bot for Poe also exposes an API for ingesting additional data by `POST` to
+`/add_document` endpoint.

 You can use the Swagger UI to quickly experiment with ingesting additional documents:
-* Locally: `http://localhost:8080/docs`
-* Publiclly via `ngrok`: `https://<instance-id>.ngrok-free.app/docs`
+
+- Locally: `http://localhost:8080/docs`
+- Publiclly via `ngrok`: `https://<instance-id>.ngrok-free.app/docs`
@@ -1,22 +1,28 @@
 # Integrations into LLM Applications

-LlamaIndex modules provide plug and play data loaders, data structures, and query interfaces. They can be used in your downstream LLM Application. Some of these applications are described below.
+LlamaIndex modules provide plug and play data loaders, data structures, and query
+interfaces. They can be used in your downstream LLM Application. Some of these
+applications are described below.

 ### Chatbots

-Chatbots are an incredibly popular use case for LLM's. LlamaIndex gives you the tools to build Knowledge-augmented chatbots and agents.
+Chatbots are an incredibly popular use case for LLM's. LlamaIndex gives you the tools to
+build Knowledge-augmented chatbots and agents.

 Relevant Resources:
+
 - [Building a Chatbot](/guides/tutorials/building_a_chatbot.md)
 - [Using with a LangChain Agent](/how_to/integrations/using_with_langchain.md)

 ### Full-Stack Web Application

-LlamaIndex can be integrated into a downstream full-stack web application. It can be used in a backend server (such as Flask), packaged into a Docker container, and/or directly used in a framework such as Streamlit.
+LlamaIndex can be integrated into a downstream full-stack web application. It can be
+used in a backend server (such as Flask), packaged into a Docker container, and/or
+directly used in a framework such as Streamlit.

 We provide tutorials and resources to help you get started in this area.

 Relevant Resources:
+
 - [Fullstack Application Guide](/guides/tutorials/fullstack_app_guide.md)
 - [LlamaIndex Starter Pack](https://github.com/logan-markewich/llama_index_starter_pack)
-
@@ -1,4 +1,4 @@
-		
+

 What I Worked On

@@ -351,6 +351,3 @@ But if so there's no reason to suppose that this is the limit of the language th


 Thanks to Trevor Blackwell, John Collison, Patrick Collison, Daniel Gackle, Ralph Hazell, Jessica Livingston, Robert Morris, and Harj Taggar for reading drafts of this.
-
-
-
@@ -1,16 +1,17 @@
 # Queries over your Data

-At a high-level, LlamaIndex gives you the ability to query your data for any downstream LLM use case,
-whether it's question-answering, summarization, or a component in a chatbot.
+At a high level, LlamaIndex gives you the ability to query your data for any downstream
+LLM use case, whether it's question-answering, summarization, or a component in a
+chatbot.

-This section describes the different ways you can query your data with LlamaIndex, roughly in order
-of simplest (top-k semantic search), to more advanced capabilities.
+This section describes the different ways you can query your data with LlamaIndex,
+roughly in order of simplest (top-k semantic search), to more advanced capabilities.

-### Semantic Search 
+### Semantic Search

-The most basic example usage of LlamaIndex is through semantic search. We provide
-a simple in-memory vector store for you to get started, but you can also choose
-to use any one of our [vector store integrations](/how_to/integrations/vector_stores.md):
+The most basic example usage of LlamaIndex is through semantic search. We provide a
+simple in-memory vector store for you to get started, but you can also choose to use any
+one of our [vector store integrations](/how_to/integrations/vector_stores.md):

 ```python
 from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader
@@ -22,20 +23,24 @@ print(response)
 ```

 Relevant Resources:
+
 - [Quickstart](/getting_started/starter_example.md)
 - [Example notebook](https://github.com/jerryjliu/llama_index/tree/main/examples/vector_indices)

-
 ### Summarization

-A summarization query requires the LLM to iterate through many if not most documents in order to synthesize an answer.
-For instance, a summarization query could look like one of the following: 
+A summarization query requires the LLM to iterate through many if not most documents in
+order to synthesize an answer. For instance, a summarization query could look like one
+of the following:
+
 - "What is a summary of this collection of text?"
 - "Give me a summary of person X's experience with the company."

-In general, a list index would be suited for this use case. A list index by default goes through all the data.
+In general, a list index would be suited for this use case. A list index by default goes
+through all the data.

-Empirically, setting `response_mode="tree_summarize"` also leads to better summarization results.
+Empirically, setting `response_mode="tree_summarize"` also leads to better summarization
+results.

 ```python
 index = GPTListIndex.from_documents(documents)
@@ -45,21 +50,23 @@ response = index.query("<summarization_query>", response_mode="tree_summarize")

 ### Queries over Structured Data

-LlamaIndex supports queries over structured data, whether that's a Pandas DataFrame or a SQL Database.
+LlamaIndex supports queries over structured data, whether that's a Pandas DataFrame or a
+SQL Database.

 Here are some relevant resources:
+
 - [Guide on Text-to-SQL](/guides/tutorials/sql_guide.md)
 - [SQL Demo Notebook 1](https://github.com/jerryjliu/llama_index/blob/main/examples/struct_indices/SQLIndexDemo.ipynb)
 - [SQL Demo Notebook 2 (Context)](https://github.com/jerryjliu/llama_index/blob/main/examples/struct_indices/SQLIndexDemo-Context.ipynb)
 - [SQL Demo Notebook 3 (Big tables)](https://github.com/jerryjliu/llama_index/blob/main/examples/struct_indices/SQLIndexDemo-ManyTables.ipynb)
 - [Pandas Demo Notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/struct_indices/PandasIndexDemo.ipynb).

-
 ### Synthesis over Heterogenous Data

-LlamaIndex supports synthesizing across heterogenous data sources. This can be done by composing a graph over your existing data.
-Specifically, compose a list index over your subindices. A list index inherently combines information for each node; therefore
-it can synthesize information across your heteregenous data sources.
+LlamaIndex supports synthesizing across heterogenous data sources. This can be done by
+composing a graph over your existing data. Specifically, compose a list index over your
+subindices. A list index inherently combines information for each node; therefore it can
+synthesize information across your heteregenous data sources.

 ```python
 from llama_index import GPTSimpleVectorIndex, GPTListIndex
@@ -74,21 +81,20 @@ response = graph.query("<query_str>", mode="recursive", query_configs=...)
 ```

 Here are some relevant resources:
+
 - [Composability](/how_to/index_structs/composability.md)
 - [City Analysis Demo](https://github.com/jerryjliu/llama_index/blob/main/examples/composable_indices/city_analysis/PineconeDemo-CityAnalysis.ipynb).

-
-
 ### Routing over Heterogenous Data

-LlamaIndex also supports routing over heteregenous data sources - for instance, if you want to "route" a query to an 
-underlying Document or a subindex.
-Here you have three options: `GPTTreeIndex`, `GPTKeywordTableIndex`, or a
+LlamaIndex also supports routing over heteregenous data sources - for instance, if you
+want to "route" a query to an underlying Document or a subindex. Here you have three
+options: `GPTTreeIndex`, `GPTKeywordTableIndex`, or a
 [Vector Store Index](vector-store-index).

-A `GPTTreeIndex` uses the LLM to select the child node(s) to send the query down to.
-A `GPTKeywordTableIndex` uses keyword matching, and a `GPTVectorStoreIndex` uses
-embedding cosine similarity.
+A `GPTTreeIndex` uses the LLM to select the child node(s) to send the query down to. A
+`GPTKeywordTableIndex` uses keyword matching, and a `GPTVectorStoreIndex` uses embedding
+cosine similarity.

 ```python
 from llama_index import GPTTreeIndex, GPTSimpleVectorIndex
@@ -102,7 +108,7 @@ index2 = GPTSimpleVectorIndex.from_documents(slack_docs)

 # tree index for routing
 tree_index = ComposableGraph.from_indices(
-    GPTTreeIndex, 
+    GPTTreeIndex,
    [index1, index2],
    index_summaries=["summary1", "summary2"]
 )
@@ -116,25 +122,25 @@ response = tree_index.query(
 ```

 Here are some relevant resources:
+
 - [Composability](/how_to/index_structs/composability.md)
 - [Composable Keyword Table Graph](https://github.com/jerryjliu/llama_index/blob/main/examples/composable_indices/ComposableIndices.ipynb).

-
-
 ### Compare/Contrast Queries

-LlamaIndex can support compare/contrast queries as well. It can do this in the following fashion:
+LlamaIndex can support compare/contrast queries as well. It can do this in the following
+fashion:
+
 - Composing a graph over your data
 - Adding in query transformations.

-
 You can perform compare/contrast queries by just composing a graph over your data.

 Here are some relevant resources:
+
 - [Composability](/how_to/index_structs/composability.md)
 - [SEC 10-k Analysis Example notebook](https://colab.research.google.com/drive/1uL1TdMbR4kqa0Ksrd_Of_jWSxWt1ia7o?usp=sharing).

-
 You can also perform compare/contrast queries with a **query transformation** module.

 ```python
@@ -144,25 +150,25 @@ decompose_transform = DecomposeQueryTransform(
 )
 ```

-This module will help break down a complex query into a simpler one over your existing index structure.
+This module will help break down a complex query into a simpler one over your existing
+index structure.

 Here are some relevant resources:
+
 - [Query Transformations](/how_to/query/query_transformations.md)
 - [City Analysis Example Notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/composable_indices/city_analysis/City_Analysis-Decompose.ipynb)

-
 ### Multi-Step Queries

-LlamaIndex can also support multi-step queries. Given a complex query, break it down into subquestions.
+LlamaIndex can also support multi-step queries. Given a complex query, break it down
+into subquestions.

-For instance, given a question "Who was in the first batch of the accelerator program the author started?",
-the module will first decompose the query into a simpler initial question "What was the accelerator program the author started?",
-query the index, and then ask followup questions.
+For instance, given a question "Who was in the first batch of the accelerator program
+the author started?", the module will first decompose the query into a simpler initial
+question "What was the accelerator program the author started?", query the index, and
+then ask followup questions.

 Here are some relevant resources:
+
 - [Query Transformations](/how_to/query/query_transformations.md)
 - [Multi-Step Query Decomposition Notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/vector_indices/SimpleIndexDemo-multistep.ipynb)
-
-
-
-
@@ -1,118 +0,0 @@
-import json
-import logging
-from typing import AsyncIterable, Dict, Optional, Union
-
-from fastapi.responses import JSONResponse
-from sse_starlette.sse import ServerSentEvent
-
-from poe_api.types import (AddDocumentsRequest, ContentType, QueryRequest,
-                           ReportErrorRequest, ReportFeedbackRequest,
-                           SettingsRequest, SettingsResponse)
-
-logger = logging.getLogger("uvicorn.default")
-
-
-class PoeHandler:
-    # Override these for your bot
-
-    async def get_response(self, query: QueryRequest) -> AsyncIterable[ServerSentEvent]:
-        """Override this to return a response to user queries."""
-        yield self.text_event("hello")
-
-    async def get_settings(self, setting: SettingsRequest) -> SettingsResponse:
-        """Override this to return non-standard settings."""
-        return SettingsResponse()
-
-    async def on_feedback(self, feedback_request: ReportFeedbackRequest) -> None:
-        """Override this to record feedback from the user."""
-        pass
-
-    async def on_error(self, error_request: ReportErrorRequest) -> None:
-        """Override this to record errors from the Poe server."""
-        logger.error(f"Error from Poe server: {error_request}")
-
-    async def add_documents(self, add_documents_request: AddDocumentsRequest) -> None:
-        """Override this to record errors from the Poe server."""
-        pass
-
-    def shutdown(self) -> None:
-        pass
-
-    # Helpers for generating responses
-
-    @staticmethod
-    def text_event(text: str) -> ServerSentEvent:
-        return ServerSentEvent(data=json.dumps({"text": text}), event="text")
-
-    @staticmethod
-    def replace_response_event(text: str) -> ServerSentEvent:
-        return ServerSentEvent(
-            data=json.dumps({"text": text}), event="replace_response"
-        )
-
-    @staticmethod
-    def done_event() -> ServerSentEvent:
-        return ServerSentEvent(data="{}", event="done")
-
-    @staticmethod
-    def suggested_reply_event(text: str) -> ServerSentEvent:
-        return ServerSentEvent(data=json.dumps({"text": text}), event="suggested_reply")
-
-    @staticmethod
-    def meta_event(
-        *,
-        content_type: ContentType = "text/markdown",
-        refetch_settings: bool = False,
-        linkify: bool = True,
-        suggested_replies: bool = True,
-    ) -> ServerSentEvent:
-        return ServerSentEvent(
-            data=json.dumps(
-                {
-                    "content_type": content_type,
-                    "refetch_settings": refetch_settings,
-                    "linkify": linkify,
-                    "suggested_replies": suggested_replies,
-                }
-            ),
-            event="meta",
-        )
-
-    @staticmethod
-    def error_event(
-        text: Optional[str] = None, *, allow_retry: bool = True
-    ) -> ServerSentEvent:
-        data: Dict[str, Union[bool, str]] = {"allow_retry": allow_retry}
-        if text is not None:
-            data["text"] = text
-        return ServerSentEvent(data=json.dumps(data), event="error")
-
-    # Internal handlers
-
-    async def handle_report_feedback(
-        self, feedback_request: ReportFeedbackRequest
-    ) -> JSONResponse:
-        await self.on_feedback(feedback_request)
-        return JSONResponse({})
-
-    async def handle_report_error(
-        self, error_request: ReportErrorRequest
-    ) -> JSONResponse:
-        await self.on_error(error_request)
-        return JSONResponse({})
-
-    async def handle_settings(self, settings_request: SettingsRequest) -> JSONResponse:
-        settings = await self.get_settings(settings_request)
-        return JSONResponse(settings.dict())
-
-    async def handle_query(self, query: QueryRequest) -> AsyncIterable[ServerSentEvent]:
-        async for event in self.get_response(query):
-            yield event
-        yield self.done_event()
-
-    async def handle_add_documents(self, request: AddDocumentsRequest) -> JSONResponse:
-        await self.add_documents(request)
-        return JSONResponse({})
-
-    def handle_shutdown(self) -> None:
-        self.shutdown()
@@ -1,28 +1,29 @@
 """
-
-Demo bot: catbot.
-
+LlamaIndex Bot.
 """
 from __future__ import annotations

 import logging
 import os
-from typing import AsyncIterable, List, Optional, Sequence, Tuple, Type
+from typing import AsyncIterable, Sequence

+from fastapi.responses import JSONResponse
 from langchain import LLMChain, OpenAI
-from langchain.chains.conversational_retrieval.prompts import \
-    CONDENSE_QUESTION_PROMPT
-from llama_index import Document as LlamaDocument
-from llama_index import IndexStructType
+from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
+from llama_index import Document as LlamaDocument, IndexStructType
 from llama_index.indices.base import BaseGPTIndex
 from llama_index.indices.registry import INDEX_STRUCT_TYPE_TO_INDEX_CLASS
 from llama_index.readers import SimpleDirectoryReader
+from poe_api.types import AddDocumentsRequest, Document
 from sse_starlette.sse import ServerSentEvent

-from poe_api.base_handler import PoeHandler
-from poe_api.types import (AddDocumentsRequest, Document, QueryRequest,
-                           ReportFeedbackRequest, SettingsRequest,
-                           SettingsResponse)
+from fastapi_poe.base import PoeHandler
+from fastapi_poe.types import (
+    QueryRequest,
+    ReportFeedbackRequest,
+    SettingsRequest,
+    SettingsResponse,
+)

 LOAD_DATA = os.environ.get("LLAMA_LOAD_DATA", True)
 DATA_DIR = os.environ.get("LLAMA_DATA_DIR", "data/")
@@ -48,14 +49,14 @@ SETTINGS = SettingsResponse(
 logger = logging.getLogger(__name__)


-def _to_llama_documents(docs: Sequence[Document]) -> List[LlamaDocument]:
+def _to_llama_documents(docs: Sequence[Document]) -> list[LlamaDocument]:
    return [LlamaDocument(text=doc.text, doc_id=doc.doc_id) for doc in docs]


 def _create_or_load_index(
-    index_type_str: Optional[str] = None,
-    index_json_path: Optional[str] = None,
-    index_type_to_index_cls: Optional[dict[str, Type[BaseGPTIndex]]] = None,
+    index_type_str: str | None = None,
+    index_json_path: str | None = None,
+    index_type_to_index_cls: dict[str, type[BaseGPTIndex]] | None = None,
 ) -> BaseGPTIndex:
    """Create or load index from json path."""
    index_json_path = index_json_path or INDEX_JSON_PATH
@@ -78,10 +79,10 @@ def _create_or_load_index(
        index = index_cls.load_from_disk(index_json_path)
        logger.info(f"Loading index from {index_json_path}")
        return index
-    except IOError:
+    except OSError:
        # Create empty index
        index = index_cls(nodes=[])
-        logger.info(f"Creating new index")
+        logger.info("Creating new index")

        if LOAD_DATA:
            logger.info(f"Loading data from {DATA_DIR}")
@@ -95,7 +96,7 @@ def _create_or_load_index(
        return index


-def _get_chat_history(chat_history: List[Tuple[str, str]]) -> str:
+def _get_chat_history(chat_history: list[tuple[str, str]]) -> str:
    buffer = ""
    for human_s, ai_s in chat_history:
        human = "Human: " + human_s
@@ -124,8 +125,7 @@ class LlamaBotHandler(PoeHandler):
        # Generate standalone question from conversation context and last message
        question_gen_model = OpenAI(temperature=0)
        question_generator = LLMChain(
-            llm=question_gen_model,
-            prompt=CONDENSE_QUESTION_PROMPT,
+            llm=question_gen_model, prompt=CONDENSE_QUESTION_PROMPT
        )

        chat_history_str = _get_chat_history(chat_history)
@@ -157,7 +157,7 @@ class LlamaBotHandler(PoeHandler):
        """Return the settings for this bot."""
        return SETTINGS

-    async def add_documents(self, request: AddDocumentsRequest) -> SettingsResponse:
+    async def add_documents(self, request: AddDocumentsRequest) -> None:
        """Add documents."""
        llama_docs = _to_llama_documents(request.documents)
        nodes = self._index.service_context.node_parser.get_nodes_from_documents(
@@ -165,5 +165,10 @@ class LlamaBotHandler(PoeHandler):
        )
        self._index.insert_nodes(nodes)

-    def shutdown(self) -> None:
+    async def handle_add_documents(self, request: AddDocumentsRequest) -> JSONResponse:
+        await self.add_documents(request)
+        return JSONResponse({})
+
+    def handle_shutdown(self) -> None:
+        """Save index upon shutdown."""
        self._index.save_to_disk(INDEX_JSON_PATH)
@@ -3,17 +3,22 @@ import logging
 import os
 from typing import Any, Dict

+import uvicorn.config
 from fastapi import Depends, FastAPI, HTTPException, Request, Response
 from fastapi.exceptions import RequestValidationError
 from fastapi.responses import HTMLResponse
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from poe_api import llama_handler
+from poe_api.types import AddDocumentsRequest
+from poe_api.utils import LoggingMiddleware
 from sse_starlette.sse import EventSourceResponse

-from poe_api import llama_handler
-from poe_api.types import (AddDocumentsRequest, QueryRequest,
-                           ReportErrorRequest, ReportFeedbackRequest,
-                           SettingsRequest)
-from poe_api.utils import LoggingMiddleware
+from fastapi_poe.types import (
+    QueryRequest,
+    ReportErrorRequest,
+    ReportFeedbackRequest,
+    SettingsRequest,
+)

 global logger
 logger = logging.getLogger("uvicorn.default")
@@ -45,7 +50,6 @@ app.add_exception_handler(RequestValidationError, exception_handler)
 # Uncomment this line to print out request and response
 app.add_middleware(LoggingMiddleware)
 logger.info("Starting")
-import uvicorn.config

 log_config = copy.deepcopy(uvicorn.config.LOGGING_CONFIG)
 log_config["formatters"]["default"]["fmt"] = "%(asctime)s - %(levelname)s - %(message)s"
@@ -1,71 +1,6 @@
-from typing import Any, Dict, List, Optional
+from typing import List

 from pydantic import BaseModel
-from typing_extensions import Literal, TypeAlias
-
-Identifier: TypeAlias = str
-FeedbackType: TypeAlias = Literal["like", "dislike"]
-ContentType: TypeAlias = Literal["text/markdown", "text/plain"]
-
-
-class MessageFeedback(BaseModel):
-    """Feedback for a message as used in the Poe protocol."""
-
-    type: FeedbackType
-    reason: Optional[str]
-
-
-class ProtocolMessage(BaseModel):
-    """A message as used in the Poe protocol."""
-
-    role: Literal["system", "user", "bot"]
-    content: str
-    content_type: ContentType
-    timestamp: int
-    message_id: str
-    message_type: Optional[str]
-    feedback: List[MessageFeedback]
-
-
-class BaseRequest(BaseModel):
-    """Common data for all requests."""
-
-    version: str
-    type: Literal["query", "settings", "report_feedback", "report_error"]
-
-
-class QueryRequest(BaseRequest):
-    """Request parameters for a query request."""
-
-    query: List[ProtocolMessage]
-    user_id: Identifier
-    conversation_id: Identifier
-    message_id: Identifier
-
-
-class SettingsRequest(BaseRequest):
-    """Request parameters for a settings request."""
-
-
-class ReportFeedbackRequest(BaseRequest):
-    """Request parameters for a report_feedback request."""
-
-    message_id: Identifier
-    user_id: Identifier
-    conversation_id: Identifier
-    feedback_type: FeedbackType
-
-
-class ReportErrorRequest(BaseRequest):
-    """Request parameters for a report_error request."""
-
-    message: str
-    metadata: Dict[str, Any]
-
-
-class SettingsResponse(BaseModel):
-    context_clear_window_secs: Optional[int] = None
-    allow_user_context_clear: bool = True


 class Document(BaseModel):
@@ -404,6 +404,24 @@ dev = ["pre-commit (>=2.17.0,<3.0.0)", "ruff (==0.0.138)", "uvicorn[standard] (>
 doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "typer-cli (>=0.0.13,<0.0.14)", "typer[all] (>=0.6.1,<0.8.0)"]
 test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6.5.0,<8.0)", "databases[sqlite] (>=0.3.2,<0.7.0)", "email-validator (>=1.1.1,<2.0.0)", "flask (>=1.1.2,<3.0.0)", "httpx (>=0.23.0,<0.24.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.982)", "orjson (>=3.2.1,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "peewee (>=3.13.3,<4.0.0)", "pytest (>=7.1.3,<8.0.0)", "python-jose[cryptography] (>=3.3.0,<4.0.0)", "python-multipart (>=0.0.5,<0.0.7)", "pyyaml (>=5.3.1,<7.0.0)", "ruff (==0.0.138)", "sqlalchemy (>=1.3.18,<1.4.43)", "types-orjson (==3.6.2)", "types-ujson (==5.7.0.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)"]

+[[package]]
+name = "fastapi-poe"
+version = "0.0.7"
+description = "A demonstration of the Poe protocol using FastAPI"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "fastapi_poe-0.0.7-py3-none-any.whl", hash = "sha256:02172470656e66376df1f37fea48ffe56f221261deee2d53d3036575fd914048"},
+    {file = "fastapi_poe-0.0.7.tar.gz", hash = "sha256:08a8ab2b0c1c6e24473dcd57d0f38b97df3dab36e7fb61995f9f0735b3b45925"},
+]
+
+[package.dependencies]
+fastapi = "*"
+sse-starlette = "*"
+typing-extensions = "*"
+uvicorn = "*"
+
 [[package]]
 name = "frozenlist"
 version = "1.3.3"
@@ -1605,4 +1623,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0"
-content-hash = "02661840da977bfc12f1f401f2eb429e4703260ca35247baf449f0625259a5b6"
+content-hash = "7b33dce7bfa06277f4893cdd5e19f5a8cd525a2033cb6c80a63623ea406aa585"
@@ -20,8 +20,9 @@ fastapi = "^0.95.1"
 sse-starlette = "^1.3.3"
 typing-extensions = "^4.5.0"
 uvicorn = "^0.21.1"
+fastapi-poe = "^0.0.7"


 [build-system]
 requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
+build-backend = "poetry.core.masonry.api"