mirror of
https://github.com/run-llama/llama-api.git
synced 2026-07-01 20:44:18 -04:00
wip
This commit is contained in:
+1
-1
@@ -34,4 +34,4 @@ try:
|
||||
"user_id": "u-1234abcd5678efgh", \
|
||||
"conversation_id": "c-jklm9012nopq3456", \
|
||||
"message_id": "2" \
|
||||
}' -N
|
||||
}' -N
|
||||
|
||||
+46
-33
@@ -1,26 +1,29 @@
|
||||
# Poe Knowledge Bot with LlamaIndex
|
||||
|
||||
A knowledge-augmented Poe bot powered by
|
||||
[LlamaIndex](https://gpt-index.readthedocs.io/en/latest/)
|
||||
and FastAPI.
|
||||
A knowledge-augmented Poe bot powered by
|
||||
[LlamaIndex](https://gpt-index.readthedocs.io/en/latest/) and FastAPI.
|
||||
|
||||
Easily ingest and chat with your own data as a knowledge base!
|
||||
|
||||
## Quick Start
|
||||
|
||||
Follow these steps to quickly setup and run the LlamaIndex bot for Poe:
|
||||
|
||||
### Setup Environment
|
||||
|
||||
1. Install poetry: `pip install poetry`
|
||||
2. Install app dependencies: `poetry install`
|
||||
3. Setup environment variables
|
||||
|
||||
| Name | Required | Description |
|
||||
| ---------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `POE_API_KEY` | Yes | This is a secret token that you need to authenticate Poe requests to the API. You can generate this from https://poe.com/create_bot?api=1. |
|
||||
| Name | Required | Description |
|
||||
| ---------------- | -------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `POE_API_KEY` | Yes | This is a secret token that you need to authenticate Poe requests to the API. You can generate this from https://poe.com/create_bot?api=1. |
|
||||
| `OPENAI_API_KEY` | Yes | This is your OpenAI API key that LlamaIndex needs to call OpenAI services. You can get an API key by creating an account on [OpenAI](https://openai.com/). |
|
||||
|
||||
### Run API Server
|
||||
* Run the API locally: `poetry run start`
|
||||
|
||||
- Run the API locally: `poetry run start`
|
||||
|
||||
```console
|
||||
INFO:poe_api.llama_handler:Creating new index
|
||||
INFO:poe_api.llama_handler:Loading data from data/
|
||||
@@ -28,50 +31,60 @@ INFO:llama_index.token_counter.token_counter:> [insert] Total LLM token usage: 0
|
||||
INFO:llama_index.token_counter.token_counter:> [insert] Total embedding token usage: 19274 tokens
|
||||
2023-04-17 15:24:05,159 - INFO - Application startup complete.
|
||||
```
|
||||
* Make the API publicly available with [ngrok](https://ngrok.com/): in a different terminal, run `ngrok http 8080`
|
||||
|
||||
- Make the API publicly available with [ngrok](https://ngrok.com/): in a different
|
||||
terminal, run `ngrok http 8080`
|
||||
|
||||
### Connect Poe to your Bot
|
||||
* Create your bot at https://poe.com/create_bot?api=1
|
||||
* Interact with your bot at https://poe.com/
|
||||
|
||||
- Create your bot at https://poe.com/create_bot?api=1
|
||||
- Interact with your bot at https://poe.com/
|
||||
|
||||
## Test Your LlamaIndex Bot
|
||||
To quickly verify if your bot is up and running, go to
|
||||
the Swagger UI at http://localhost:8080/docs, authenticate with your `POE_API_KEY` and issue a query (satisfying the [Poe Protocol](https://github.com/poe-platform/poe-protocol/blob/main/docs/spec.md))
|
||||
|
||||
To quickly verify if your bot is up and running, go to the Swagger UI at
|
||||
http://localhost:8080/docs, authenticate with your `POE_API_KEY` and issue a query
|
||||
(satisfying the
|
||||
[Poe Protocol](https://github.com/poe-platform/poe-protocol/blob/main/docs/spec.md))
|
||||
|
||||
Alternatively, to use a sample query, replace `<add your POE_API_KEY here>` in
|
||||
`Makefile` with your `POE_API_KEY`, then run:
|
||||
|
||||
Alternatively, to use a sample query, replace `<add your POE_API_KEY here>` in `Makefile` with your `POE_API_KEY`,
|
||||
then run:
|
||||
```console
|
||||
make try
|
||||
```
|
||||
|
||||
## Customize Your LlamaIndex Bot
|
||||
By default, we ingest documents under `data/` and index them with a `GPTSimpleVectorIndex`.
|
||||
|
||||
By default, we ingest documents under `data/` and index them with a
|
||||
`GPTSimpleVectorIndex`.
|
||||
|
||||
You can configure the default behavior via environment variables:
|
||||
|
||||
| Name | Required | Description |
|
||||
| ---------------- | -------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `LLAMA_LOAD_DATA` | Optional | Whether to ingest documents in `DATA_DIR`.Defaults to `True` |
|
||||
| `LLAMA_DATA_DIR` | Optional | Directory to ingest initial documents from. Defaults to `data/` |
|
||||
| `LLAMA_INDEX_TYPE` | Optional | Index type (see below for details). Defaults to `simple_dict` |
|
||||
| `INDEX_JSON_PATH` | Optional | Path to saved Index json file. `save/index.json`|
|
||||
| Name | Required | Description |
|
||||
| ------------------ | -------- | --------------------------------------------------------------- |
|
||||
| `LLAMA_LOAD_DATA` | Optional | Whether to ingest documents in `DATA_DIR`.Defaults to `True` |
|
||||
| `LLAMA_DATA_DIR` | Optional | Directory to ingest initial documents from. Defaults to `data/` |
|
||||
| `LLAMA_INDEX_TYPE` | Optional | Index type (see below for details). Defaults to `simple_dict` |
|
||||
| `INDEX_JSON_PATH` | Optional | Path to saved Index json file. `save/index.json` |
|
||||
|
||||
**Different Index Types**
|
||||
By default, we use a `GPTSimpleVectorIndex` to store document chunks in memory,
|
||||
and retrieve top-k nodes by embedding similarity.
|
||||
Different index types are optimized for different data and query use-cases.
|
||||
See this guide on [How Each Index Works](https://gpt-index.readthedocs.io/en/latest/guides/primer/index_guide.html) to learn more.
|
||||
You can configure the index type via the `LLAMA_INDEX_TYPE`, see [here](https://gpt-index.readthedocs.io/en/latest/reference/indices/composability_query.html#gpt_index.data_structs.struct_type.IndexStructType) for the full list of accepted index type identifiers.
|
||||
**Different Index Types** By default, we use a `GPTSimpleVectorIndex` to store document
|
||||
chunks in memory, and retrieve top-k nodes by embedding similarity. Different index
|
||||
types are optimized for different data and query use-cases. See this guide on
|
||||
[How Each Index Works](https://gpt-index.readthedocs.io/en/latest/guides/primer/index_guide.html)
|
||||
to learn more. You can configure the index type via the `LLAMA_INDEX_TYPE`, see
|
||||
[here](https://gpt-index.readthedocs.io/en/latest/reference/indices/composability_query.html#gpt_index.data_structs.struct_type.IndexStructType)
|
||||
for the full list of accepted index type identifiers.
|
||||
|
||||
|
||||
Read more details on [readthedocs](https://gpt-index.readthedocs.io/en/latest/),
|
||||
and engage with the community on [discord](https://discord.com/invite/dGcwcsnxhU).
|
||||
Read more details on [readthedocs](https://gpt-index.readthedocs.io/en/latest/), and
|
||||
engage with the community on [discord](https://discord.com/invite/dGcwcsnxhU).
|
||||
|
||||
## Ingesting Data
|
||||
LlamaIndex bot for Poe also exposes an API for ingesting additional data by `POST` to `/add_document` endpoint.
|
||||
|
||||
LlamaIndex bot for Poe also exposes an API for ingesting additional data by `POST` to
|
||||
`/add_document` endpoint.
|
||||
|
||||
You can use the Swagger UI to quickly experiment with ingesting additional documents:
|
||||
* Locally: `http://localhost:8080/docs`
|
||||
* Publiclly via `ngrok`: `https://<instance-id>.ngrok-free.app/docs`
|
||||
|
||||
- Locally: `http://localhost:8080/docs`
|
||||
- Publiclly via `ngrok`: `https://<instance-id>.ngrok-free.app/docs`
|
||||
|
||||
+10
-4
@@ -1,22 +1,28 @@
|
||||
# Integrations into LLM Applications
|
||||
|
||||
LlamaIndex modules provide plug and play data loaders, data structures, and query interfaces. They can be used in your downstream LLM Application. Some of these applications are described below.
|
||||
LlamaIndex modules provide plug and play data loaders, data structures, and query
|
||||
interfaces. They can be used in your downstream LLM Application. Some of these
|
||||
applications are described below.
|
||||
|
||||
### Chatbots
|
||||
|
||||
Chatbots are an incredibly popular use case for LLM's. LlamaIndex gives you the tools to build Knowledge-augmented chatbots and agents.
|
||||
Chatbots are an incredibly popular use case for LLM's. LlamaIndex gives you the tools to
|
||||
build Knowledge-augmented chatbots and agents.
|
||||
|
||||
Relevant Resources:
|
||||
|
||||
- [Building a Chatbot](/guides/tutorials/building_a_chatbot.md)
|
||||
- [Using with a LangChain Agent](/how_to/integrations/using_with_langchain.md)
|
||||
|
||||
### Full-Stack Web Application
|
||||
|
||||
LlamaIndex can be integrated into a downstream full-stack web application. It can be used in a backend server (such as Flask), packaged into a Docker container, and/or directly used in a framework such as Streamlit.
|
||||
LlamaIndex can be integrated into a downstream full-stack web application. It can be
|
||||
used in a backend server (such as Flask), packaged into a Docker container, and/or
|
||||
directly used in a framework such as Streamlit.
|
||||
|
||||
We provide tutorials and resources to help you get started in this area.
|
||||
|
||||
Relevant Resources:
|
||||
|
||||
- [Fullstack Application Guide](/guides/tutorials/fullstack_app_guide.md)
|
||||
- [LlamaIndex Starter Pack](https://github.com/logan-markewich/llama_index_starter_pack)
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
|
||||
|
||||
|
||||
What I Worked On
|
||||
|
||||
@@ -351,6 +351,3 @@ But if so there's no reason to suppose that this is the limit of the language th
|
||||
|
||||
|
||||
Thanks to Trevor Blackwell, John Collison, Patrick Collison, Daniel Gackle, Ralph Hazell, Jessica Livingston, Robert Morris, and Harj Taggar for reading drafts of this.
|
||||
|
||||
|
||||
|
||||
|
||||
+48
-42
@@ -1,16 +1,17 @@
|
||||
# Queries over your Data
|
||||
|
||||
At a high-level, LlamaIndex gives you the ability to query your data for any downstream LLM use case,
|
||||
whether it's question-answering, summarization, or a component in a chatbot.
|
||||
At a high level, LlamaIndex gives you the ability to query your data for any downstream
|
||||
LLM use case, whether it's question-answering, summarization, or a component in a
|
||||
chatbot.
|
||||
|
||||
This section describes the different ways you can query your data with LlamaIndex, roughly in order
|
||||
of simplest (top-k semantic search), to more advanced capabilities.
|
||||
This section describes the different ways you can query your data with LlamaIndex,
|
||||
roughly in order of simplest (top-k semantic search), to more advanced capabilities.
|
||||
|
||||
### Semantic Search
|
||||
### Semantic Search
|
||||
|
||||
The most basic example usage of LlamaIndex is through semantic search. We provide
|
||||
a simple in-memory vector store for you to get started, but you can also choose
|
||||
to use any one of our [vector store integrations](/how_to/integrations/vector_stores.md):
|
||||
The most basic example usage of LlamaIndex is through semantic search. We provide a
|
||||
simple in-memory vector store for you to get started, but you can also choose to use any
|
||||
one of our [vector store integrations](/how_to/integrations/vector_stores.md):
|
||||
|
||||
```python
|
||||
from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader
|
||||
@@ -22,20 +23,24 @@ print(response)
|
||||
```
|
||||
|
||||
Relevant Resources:
|
||||
|
||||
- [Quickstart](/getting_started/starter_example.md)
|
||||
- [Example notebook](https://github.com/jerryjliu/llama_index/tree/main/examples/vector_indices)
|
||||
|
||||
|
||||
### Summarization
|
||||
|
||||
A summarization query requires the LLM to iterate through many if not most documents in order to synthesize an answer.
|
||||
For instance, a summarization query could look like one of the following:
|
||||
A summarization query requires the LLM to iterate through many if not most documents in
|
||||
order to synthesize an answer. For instance, a summarization query could look like one
|
||||
of the following:
|
||||
|
||||
- "What is a summary of this collection of text?"
|
||||
- "Give me a summary of person X's experience with the company."
|
||||
|
||||
In general, a list index would be suited for this use case. A list index by default goes through all the data.
|
||||
In general, a list index would be suited for this use case. A list index by default goes
|
||||
through all the data.
|
||||
|
||||
Empirically, setting `response_mode="tree_summarize"` also leads to better summarization results.
|
||||
Empirically, setting `response_mode="tree_summarize"` also leads to better summarization
|
||||
results.
|
||||
|
||||
```python
|
||||
index = GPTListIndex.from_documents(documents)
|
||||
@@ -45,21 +50,23 @@ response = index.query("<summarization_query>", response_mode="tree_summarize")
|
||||
|
||||
### Queries over Structured Data
|
||||
|
||||
LlamaIndex supports queries over structured data, whether that's a Pandas DataFrame or a SQL Database.
|
||||
LlamaIndex supports queries over structured data, whether that's a Pandas DataFrame or a
|
||||
SQL Database.
|
||||
|
||||
Here are some relevant resources:
|
||||
|
||||
- [Guide on Text-to-SQL](/guides/tutorials/sql_guide.md)
|
||||
- [SQL Demo Notebook 1](https://github.com/jerryjliu/llama_index/blob/main/examples/struct_indices/SQLIndexDemo.ipynb)
|
||||
- [SQL Demo Notebook 2 (Context)](https://github.com/jerryjliu/llama_index/blob/main/examples/struct_indices/SQLIndexDemo-Context.ipynb)
|
||||
- [SQL Demo Notebook 3 (Big tables)](https://github.com/jerryjliu/llama_index/blob/main/examples/struct_indices/SQLIndexDemo-ManyTables.ipynb)
|
||||
- [Pandas Demo Notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/struct_indices/PandasIndexDemo.ipynb).
|
||||
|
||||
|
||||
### Synthesis over Heterogenous Data
|
||||
|
||||
LlamaIndex supports synthesizing across heterogenous data sources. This can be done by composing a graph over your existing data.
|
||||
Specifically, compose a list index over your subindices. A list index inherently combines information for each node; therefore
|
||||
it can synthesize information across your heteregenous data sources.
|
||||
LlamaIndex supports synthesizing across heterogenous data sources. This can be done by
|
||||
composing a graph over your existing data. Specifically, compose a list index over your
|
||||
subindices. A list index inherently combines information for each node; therefore it can
|
||||
synthesize information across your heteregenous data sources.
|
||||
|
||||
```python
|
||||
from llama_index import GPTSimpleVectorIndex, GPTListIndex
|
||||
@@ -74,21 +81,20 @@ response = graph.query("<query_str>", mode="recursive", query_configs=...)
|
||||
```
|
||||
|
||||
Here are some relevant resources:
|
||||
|
||||
- [Composability](/how_to/index_structs/composability.md)
|
||||
- [City Analysis Demo](https://github.com/jerryjliu/llama_index/blob/main/examples/composable_indices/city_analysis/PineconeDemo-CityAnalysis.ipynb).
|
||||
|
||||
|
||||
|
||||
### Routing over Heterogenous Data
|
||||
|
||||
LlamaIndex also supports routing over heteregenous data sources - for instance, if you want to "route" a query to an
|
||||
underlying Document or a subindex.
|
||||
Here you have three options: `GPTTreeIndex`, `GPTKeywordTableIndex`, or a
|
||||
LlamaIndex also supports routing over heteregenous data sources - for instance, if you
|
||||
want to "route" a query to an underlying Document or a subindex. Here you have three
|
||||
options: `GPTTreeIndex`, `GPTKeywordTableIndex`, or a
|
||||
[Vector Store Index](vector-store-index).
|
||||
|
||||
A `GPTTreeIndex` uses the LLM to select the child node(s) to send the query down to.
|
||||
A `GPTKeywordTableIndex` uses keyword matching, and a `GPTVectorStoreIndex` uses
|
||||
embedding cosine similarity.
|
||||
A `GPTTreeIndex` uses the LLM to select the child node(s) to send the query down to. A
|
||||
`GPTKeywordTableIndex` uses keyword matching, and a `GPTVectorStoreIndex` uses embedding
|
||||
cosine similarity.
|
||||
|
||||
```python
|
||||
from llama_index import GPTTreeIndex, GPTSimpleVectorIndex
|
||||
@@ -102,7 +108,7 @@ index2 = GPTSimpleVectorIndex.from_documents(slack_docs)
|
||||
|
||||
# tree index for routing
|
||||
tree_index = ComposableGraph.from_indices(
|
||||
GPTTreeIndex,
|
||||
GPTTreeIndex,
|
||||
[index1, index2],
|
||||
index_summaries=["summary1", "summary2"]
|
||||
)
|
||||
@@ -116,25 +122,25 @@ response = tree_index.query(
|
||||
```
|
||||
|
||||
Here are some relevant resources:
|
||||
|
||||
- [Composability](/how_to/index_structs/composability.md)
|
||||
- [Composable Keyword Table Graph](https://github.com/jerryjliu/llama_index/blob/main/examples/composable_indices/ComposableIndices.ipynb).
|
||||
|
||||
|
||||
|
||||
### Compare/Contrast Queries
|
||||
|
||||
LlamaIndex can support compare/contrast queries as well. It can do this in the following fashion:
|
||||
LlamaIndex can support compare/contrast queries as well. It can do this in the following
|
||||
fashion:
|
||||
|
||||
- Composing a graph over your data
|
||||
- Adding in query transformations.
|
||||
|
||||
|
||||
You can perform compare/contrast queries by just composing a graph over your data.
|
||||
|
||||
Here are some relevant resources:
|
||||
|
||||
- [Composability](/how_to/index_structs/composability.md)
|
||||
- [SEC 10-k Analysis Example notebook](https://colab.research.google.com/drive/1uL1TdMbR4kqa0Ksrd_Of_jWSxWt1ia7o?usp=sharing).
|
||||
|
||||
|
||||
You can also perform compare/contrast queries with a **query transformation** module.
|
||||
|
||||
```python
|
||||
@@ -144,25 +150,25 @@ decompose_transform = DecomposeQueryTransform(
|
||||
)
|
||||
```
|
||||
|
||||
This module will help break down a complex query into a simpler one over your existing index structure.
|
||||
This module will help break down a complex query into a simpler one over your existing
|
||||
index structure.
|
||||
|
||||
Here are some relevant resources:
|
||||
|
||||
- [Query Transformations](/how_to/query/query_transformations.md)
|
||||
- [City Analysis Example Notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/composable_indices/city_analysis/City_Analysis-Decompose.ipynb)
|
||||
|
||||
|
||||
### Multi-Step Queries
|
||||
|
||||
LlamaIndex can also support multi-step queries. Given a complex query, break it down into subquestions.
|
||||
LlamaIndex can also support multi-step queries. Given a complex query, break it down
|
||||
into subquestions.
|
||||
|
||||
For instance, given a question "Who was in the first batch of the accelerator program the author started?",
|
||||
the module will first decompose the query into a simpler initial question "What was the accelerator program the author started?",
|
||||
query the index, and then ask followup questions.
|
||||
For instance, given a question "Who was in the first batch of the accelerator program
|
||||
the author started?", the module will first decompose the query into a simpler initial
|
||||
question "What was the accelerator program the author started?", query the index, and
|
||||
then ask followup questions.
|
||||
|
||||
Here are some relevant resources:
|
||||
|
||||
- [Query Transformations](/how_to/query/query_transformations.md)
|
||||
- [Multi-Step Query Decomposition Notebook](https://github.com/jerryjliu/llama_index/blob/main/examples/vector_indices/SimpleIndexDemo-multistep.ipynb)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,118 +0,0 @@
|
||||
import json
|
||||
import logging
|
||||
from typing import AsyncIterable, Dict, Optional, Union
|
||||
|
||||
from fastapi.responses import JSONResponse
|
||||
from sse_starlette.sse import ServerSentEvent
|
||||
|
||||
from poe_api.types import (AddDocumentsRequest, ContentType, QueryRequest,
|
||||
ReportErrorRequest, ReportFeedbackRequest,
|
||||
SettingsRequest, SettingsResponse)
|
||||
|
||||
logger = logging.getLogger("uvicorn.default")
|
||||
|
||||
|
||||
class PoeHandler:
|
||||
# Override these for your bot
|
||||
|
||||
async def get_response(self, query: QueryRequest) -> AsyncIterable[ServerSentEvent]:
|
||||
"""Override this to return a response to user queries."""
|
||||
yield self.text_event("hello")
|
||||
|
||||
async def get_settings(self, setting: SettingsRequest) -> SettingsResponse:
|
||||
"""Override this to return non-standard settings."""
|
||||
return SettingsResponse()
|
||||
|
||||
async def on_feedback(self, feedback_request: ReportFeedbackRequest) -> None:
|
||||
"""Override this to record feedback from the user."""
|
||||
pass
|
||||
|
||||
async def on_error(self, error_request: ReportErrorRequest) -> None:
|
||||
"""Override this to record errors from the Poe server."""
|
||||
logger.error(f"Error from Poe server: {error_request}")
|
||||
|
||||
async def add_documents(self, add_documents_request: AddDocumentsRequest) -> None:
|
||||
"""Override this to record errors from the Poe server."""
|
||||
pass
|
||||
|
||||
def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
# Helpers for generating responses
|
||||
|
||||
@staticmethod
|
||||
def text_event(text: str) -> ServerSentEvent:
|
||||
return ServerSentEvent(data=json.dumps({"text": text}), event="text")
|
||||
|
||||
@staticmethod
|
||||
def replace_response_event(text: str) -> ServerSentEvent:
|
||||
return ServerSentEvent(
|
||||
data=json.dumps({"text": text}), event="replace_response"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def done_event() -> ServerSentEvent:
|
||||
return ServerSentEvent(data="{}", event="done")
|
||||
|
||||
@staticmethod
|
||||
def suggested_reply_event(text: str) -> ServerSentEvent:
|
||||
return ServerSentEvent(data=json.dumps({"text": text}), event="suggested_reply")
|
||||
|
||||
@staticmethod
|
||||
def meta_event(
|
||||
*,
|
||||
content_type: ContentType = "text/markdown",
|
||||
refetch_settings: bool = False,
|
||||
linkify: bool = True,
|
||||
suggested_replies: bool = True,
|
||||
) -> ServerSentEvent:
|
||||
return ServerSentEvent(
|
||||
data=json.dumps(
|
||||
{
|
||||
"content_type": content_type,
|
||||
"refetch_settings": refetch_settings,
|
||||
"linkify": linkify,
|
||||
"suggested_replies": suggested_replies,
|
||||
}
|
||||
),
|
||||
event="meta",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def error_event(
|
||||
text: Optional[str] = None, *, allow_retry: bool = True
|
||||
) -> ServerSentEvent:
|
||||
data: Dict[str, Union[bool, str]] = {"allow_retry": allow_retry}
|
||||
if text is not None:
|
||||
data["text"] = text
|
||||
return ServerSentEvent(data=json.dumps(data), event="error")
|
||||
|
||||
# Internal handlers
|
||||
|
||||
async def handle_report_feedback(
|
||||
self, feedback_request: ReportFeedbackRequest
|
||||
) -> JSONResponse:
|
||||
await self.on_feedback(feedback_request)
|
||||
return JSONResponse({})
|
||||
|
||||
async def handle_report_error(
|
||||
self, error_request: ReportErrorRequest
|
||||
) -> JSONResponse:
|
||||
await self.on_error(error_request)
|
||||
return JSONResponse({})
|
||||
|
||||
async def handle_settings(self, settings_request: SettingsRequest) -> JSONResponse:
|
||||
settings = await self.get_settings(settings_request)
|
||||
return JSONResponse(settings.dict())
|
||||
|
||||
async def handle_query(self, query: QueryRequest) -> AsyncIterable[ServerSentEvent]:
|
||||
async for event in self.get_response(query):
|
||||
yield event
|
||||
yield self.done_event()
|
||||
|
||||
async def handle_add_documents(self, request: AddDocumentsRequest) -> JSONResponse:
|
||||
await self.add_documents(request)
|
||||
return JSONResponse({})
|
||||
|
||||
def handle_shutdown(self) -> None:
|
||||
self.shutdown()
|
||||
@@ -1,28 +1,29 @@
|
||||
"""
|
||||
|
||||
Demo bot: catbot.
|
||||
|
||||
LlamaIndex Bot.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import AsyncIterable, List, Optional, Sequence, Tuple, Type
|
||||
from typing import AsyncIterable, Sequence
|
||||
|
||||
from fastapi.responses import JSONResponse
|
||||
from langchain import LLMChain, OpenAI
|
||||
from langchain.chains.conversational_retrieval.prompts import \
|
||||
CONDENSE_QUESTION_PROMPT
|
||||
from llama_index import Document as LlamaDocument
|
||||
from llama_index import IndexStructType
|
||||
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
|
||||
from llama_index import Document as LlamaDocument, IndexStructType
|
||||
from llama_index.indices.base import BaseGPTIndex
|
||||
from llama_index.indices.registry import INDEX_STRUCT_TYPE_TO_INDEX_CLASS
|
||||
from llama_index.readers import SimpleDirectoryReader
|
||||
from poe_api.types import AddDocumentsRequest, Document
|
||||
from sse_starlette.sse import ServerSentEvent
|
||||
|
||||
from poe_api.base_handler import PoeHandler
|
||||
from poe_api.types import (AddDocumentsRequest, Document, QueryRequest,
|
||||
ReportFeedbackRequest, SettingsRequest,
|
||||
SettingsResponse)
|
||||
from fastapi_poe.base import PoeHandler
|
||||
from fastapi_poe.types import (
|
||||
QueryRequest,
|
||||
ReportFeedbackRequest,
|
||||
SettingsRequest,
|
||||
SettingsResponse,
|
||||
)
|
||||
|
||||
LOAD_DATA = os.environ.get("LLAMA_LOAD_DATA", True)
|
||||
DATA_DIR = os.environ.get("LLAMA_DATA_DIR", "data/")
|
||||
@@ -48,14 +49,14 @@ SETTINGS = SettingsResponse(
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _to_llama_documents(docs: Sequence[Document]) -> List[LlamaDocument]:
|
||||
def _to_llama_documents(docs: Sequence[Document]) -> list[LlamaDocument]:
|
||||
return [LlamaDocument(text=doc.text, doc_id=doc.doc_id) for doc in docs]
|
||||
|
||||
|
||||
def _create_or_load_index(
|
||||
index_type_str: Optional[str] = None,
|
||||
index_json_path: Optional[str] = None,
|
||||
index_type_to_index_cls: Optional[dict[str, Type[BaseGPTIndex]]] = None,
|
||||
index_type_str: str | None = None,
|
||||
index_json_path: str | None = None,
|
||||
index_type_to_index_cls: dict[str, type[BaseGPTIndex]] | None = None,
|
||||
) -> BaseGPTIndex:
|
||||
"""Create or load index from json path."""
|
||||
index_json_path = index_json_path or INDEX_JSON_PATH
|
||||
@@ -78,10 +79,10 @@ def _create_or_load_index(
|
||||
index = index_cls.load_from_disk(index_json_path)
|
||||
logger.info(f"Loading index from {index_json_path}")
|
||||
return index
|
||||
except IOError:
|
||||
except OSError:
|
||||
# Create empty index
|
||||
index = index_cls(nodes=[])
|
||||
logger.info(f"Creating new index")
|
||||
logger.info("Creating new index")
|
||||
|
||||
if LOAD_DATA:
|
||||
logger.info(f"Loading data from {DATA_DIR}")
|
||||
@@ -95,7 +96,7 @@ def _create_or_load_index(
|
||||
return index
|
||||
|
||||
|
||||
def _get_chat_history(chat_history: List[Tuple[str, str]]) -> str:
|
||||
def _get_chat_history(chat_history: list[tuple[str, str]]) -> str:
|
||||
buffer = ""
|
||||
for human_s, ai_s in chat_history:
|
||||
human = "Human: " + human_s
|
||||
@@ -124,8 +125,7 @@ class LlamaBotHandler(PoeHandler):
|
||||
# Generate standalone question from conversation context and last message
|
||||
question_gen_model = OpenAI(temperature=0)
|
||||
question_generator = LLMChain(
|
||||
llm=question_gen_model,
|
||||
prompt=CONDENSE_QUESTION_PROMPT,
|
||||
llm=question_gen_model, prompt=CONDENSE_QUESTION_PROMPT
|
||||
)
|
||||
|
||||
chat_history_str = _get_chat_history(chat_history)
|
||||
@@ -157,7 +157,7 @@ class LlamaBotHandler(PoeHandler):
|
||||
"""Return the settings for this bot."""
|
||||
return SETTINGS
|
||||
|
||||
async def add_documents(self, request: AddDocumentsRequest) -> SettingsResponse:
|
||||
async def add_documents(self, request: AddDocumentsRequest) -> None:
|
||||
"""Add documents."""
|
||||
llama_docs = _to_llama_documents(request.documents)
|
||||
nodes = self._index.service_context.node_parser.get_nodes_from_documents(
|
||||
@@ -165,5 +165,10 @@ class LlamaBotHandler(PoeHandler):
|
||||
)
|
||||
self._index.insert_nodes(nodes)
|
||||
|
||||
def shutdown(self) -> None:
|
||||
async def handle_add_documents(self, request: AddDocumentsRequest) -> JSONResponse:
|
||||
await self.add_documents(request)
|
||||
return JSONResponse({})
|
||||
|
||||
def handle_shutdown(self) -> None:
|
||||
"""Save index upon shutdown."""
|
||||
self._index.save_to_disk(INDEX_JSON_PATH)
|
||||
|
||||
@@ -3,17 +3,22 @@ import logging
|
||||
import os
|
||||
from typing import Any, Dict
|
||||
|
||||
import uvicorn.config
|
||||
from fastapi import Depends, FastAPI, HTTPException, Request, Response
|
||||
from fastapi.exceptions import RequestValidationError
|
||||
from fastapi.responses import HTMLResponse
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from poe_api import llama_handler
|
||||
from poe_api.types import AddDocumentsRequest
|
||||
from poe_api.utils import LoggingMiddleware
|
||||
from sse_starlette.sse import EventSourceResponse
|
||||
|
||||
from poe_api import llama_handler
|
||||
from poe_api.types import (AddDocumentsRequest, QueryRequest,
|
||||
ReportErrorRequest, ReportFeedbackRequest,
|
||||
SettingsRequest)
|
||||
from poe_api.utils import LoggingMiddleware
|
||||
from fastapi_poe.types import (
|
||||
QueryRequest,
|
||||
ReportErrorRequest,
|
||||
ReportFeedbackRequest,
|
||||
SettingsRequest,
|
||||
)
|
||||
|
||||
global logger
|
||||
logger = logging.getLogger("uvicorn.default")
|
||||
@@ -45,7 +50,6 @@ app.add_exception_handler(RequestValidationError, exception_handler)
|
||||
# Uncomment this line to print out request and response
|
||||
app.add_middleware(LoggingMiddleware)
|
||||
logger.info("Starting")
|
||||
import uvicorn.config
|
||||
|
||||
log_config = copy.deepcopy(uvicorn.config.LOGGING_CONFIG)
|
||||
log_config["formatters"]["default"]["fmt"] = "%(asctime)s - %(levelname)s - %(message)s"
|
||||
|
||||
@@ -1,71 +1,6 @@
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel
|
||||
from typing_extensions import Literal, TypeAlias
|
||||
|
||||
Identifier: TypeAlias = str
|
||||
FeedbackType: TypeAlias = Literal["like", "dislike"]
|
||||
ContentType: TypeAlias = Literal["text/markdown", "text/plain"]
|
||||
|
||||
|
||||
class MessageFeedback(BaseModel):
|
||||
"""Feedback for a message as used in the Poe protocol."""
|
||||
|
||||
type: FeedbackType
|
||||
reason: Optional[str]
|
||||
|
||||
|
||||
class ProtocolMessage(BaseModel):
|
||||
"""A message as used in the Poe protocol."""
|
||||
|
||||
role: Literal["system", "user", "bot"]
|
||||
content: str
|
||||
content_type: ContentType
|
||||
timestamp: int
|
||||
message_id: str
|
||||
message_type: Optional[str]
|
||||
feedback: List[MessageFeedback]
|
||||
|
||||
|
||||
class BaseRequest(BaseModel):
|
||||
"""Common data for all requests."""
|
||||
|
||||
version: str
|
||||
type: Literal["query", "settings", "report_feedback", "report_error"]
|
||||
|
||||
|
||||
class QueryRequest(BaseRequest):
|
||||
"""Request parameters for a query request."""
|
||||
|
||||
query: List[ProtocolMessage]
|
||||
user_id: Identifier
|
||||
conversation_id: Identifier
|
||||
message_id: Identifier
|
||||
|
||||
|
||||
class SettingsRequest(BaseRequest):
|
||||
"""Request parameters for a settings request."""
|
||||
|
||||
|
||||
class ReportFeedbackRequest(BaseRequest):
|
||||
"""Request parameters for a report_feedback request."""
|
||||
|
||||
message_id: Identifier
|
||||
user_id: Identifier
|
||||
conversation_id: Identifier
|
||||
feedback_type: FeedbackType
|
||||
|
||||
|
||||
class ReportErrorRequest(BaseRequest):
|
||||
"""Request parameters for a report_error request."""
|
||||
|
||||
message: str
|
||||
metadata: Dict[str, Any]
|
||||
|
||||
|
||||
class SettingsResponse(BaseModel):
|
||||
context_clear_window_secs: Optional[int] = None
|
||||
allow_user_context_clear: bool = True
|
||||
|
||||
|
||||
class Document(BaseModel):
|
||||
|
||||
Generated
+19
-1
@@ -404,6 +404,24 @@ dev = ["pre-commit (>=2.17.0,<3.0.0)", "ruff (==0.0.138)", "uvicorn[standard] (>
|
||||
doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "typer-cli (>=0.0.13,<0.0.14)", "typer[all] (>=0.6.1,<0.8.0)"]
|
||||
test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6.5.0,<8.0)", "databases[sqlite] (>=0.3.2,<0.7.0)", "email-validator (>=1.1.1,<2.0.0)", "flask (>=1.1.2,<3.0.0)", "httpx (>=0.23.0,<0.24.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.982)", "orjson (>=3.2.1,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "peewee (>=3.13.3,<4.0.0)", "pytest (>=7.1.3,<8.0.0)", "python-jose[cryptography] (>=3.3.0,<4.0.0)", "python-multipart (>=0.0.5,<0.0.7)", "pyyaml (>=5.3.1,<7.0.0)", "ruff (==0.0.138)", "sqlalchemy (>=1.3.18,<1.4.43)", "types-orjson (==3.6.2)", "types-ujson (==5.7.0.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "fastapi-poe"
|
||||
version = "0.0.7"
|
||||
description = "A demonstration of the Poe protocol using FastAPI"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "fastapi_poe-0.0.7-py3-none-any.whl", hash = "sha256:02172470656e66376df1f37fea48ffe56f221261deee2d53d3036575fd914048"},
|
||||
{file = "fastapi_poe-0.0.7.tar.gz", hash = "sha256:08a8ab2b0c1c6e24473dcd57d0f38b97df3dab36e7fb61995f9f0735b3b45925"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
fastapi = "*"
|
||||
sse-starlette = "*"
|
||||
typing-extensions = "*"
|
||||
uvicorn = "*"
|
||||
|
||||
[[package]]
|
||||
name = "frozenlist"
|
||||
version = "1.3.3"
|
||||
@@ -1605,4 +1623,4 @@ multidict = ">=4.0"
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0"
|
||||
content-hash = "02661840da977bfc12f1f401f2eb429e4703260ca35247baf449f0625259a5b6"
|
||||
content-hash = "7b33dce7bfa06277f4893cdd5e19f5a8cd525a2033cb6c80a63623ea406aa585"
|
||||
|
||||
@@ -20,8 +20,9 @@ fastapi = "^0.95.1"
|
||||
sse-starlette = "^1.3.3"
|
||||
typing-extensions = "^4.5.0"
|
||||
uvicorn = "^0.21.1"
|
||||
fastapi-poe = "^0.0.7"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
Reference in New Issue
Block a user