Compare commits

..

71 Commits

Author SHA1 Message Date
leehuwuj 05748bdf10 refactor code 2024-05-27 14:53:01 +07:00
leehuwuj d60b3c5a96 refactor code and add changeset 2024-05-27 13:09:59 +07:00
leehuwuj c3e9ed3df4 feat: add support for FastAPI in code interpreter tool 2024-05-27 12:37:49 +07:00
github-actions[bot] 1fde1dc585 Release 0.1.8 (#97)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-05-23 22:15:51 +07:00
Thuc Pham cd50a33d43 feat: implement interpreter tool (#94)
---------
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2024-05-23 21:49:18 +07:00
github-actions[bot] ed114856d9 Release 0.1.7 (#93)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-05-22 18:30:49 +07:00
Marcus Schiesser 69c2e16c82 fix: streaming for express 2024-05-22 13:04:35 +02:00
Marcus Schiesser f5da6623cf fix: update llamaindex, use 127.0.0.1 for ollama as default 2024-05-22 12:42:34 +02:00
Marcus Schiesser 0950cb90f2 fix: global-agent types 2024-05-22 11:50:34 +02:00
Mohammad Amir bb53425b4b Proxy support added via global agent (#76) 2024-05-22 16:35:03 +07:00
Huu Le (Lee) bbd5b8ddd6 fix: Reuse PG vector store to avoid recreating sqlalchemy engine (#95) 2024-05-22 16:12:44 +07:00
Thuc Pham 260d37a3f1 feat(ts): add system prompt for chat engine (#92) 2024-05-20 16:12:19 +07:00
Huu Le (Lee) 7873bfb030 chore: Add Ollama API base URL environment variable (#91) 2024-05-17 17:01:06 +07:00
github-actions[bot] 0c7c41ee3b Release 0.1.6 (#90)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-05-16 19:08:40 +07:00
Thuc Pham 56537a1473 feat: host local files and add viewer for PDFs (#85) 2024-05-16 18:06:26 +07:00
github-actions[bot] d8dfc29edd Release 0.1.5 (#89)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-05-16 16:12:40 +07:00
Thuc Pham 84db798353 feat: support display latex in chat markdown (#88) 2024-05-16 15:25:53 +07:00
github-actions[bot] 67a062af14 Release 0.1.4 (#86)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-05-14 20:08:48 +07:00
Marcus Schiesser 0bc8e75c64 docs: add changeset for ingestion pipeline 2024-05-14 15:07:40 +02:00
Huu Le (Lee) 6bd5e7b77a using ingestion pipeline for chromadb (#87) 2024-05-14 20:02:47 +07:00
Huu Le (Lee) 38bc1d1350 Use ingestion pipeline for dedicated vector stores (#74) 2024-05-14 18:58:07 +07:00
Huu Le (Lee) cb1001de95 feat: add support for ChromaDB vector store (#82) 2024-05-14 15:42:01 +07:00
github-actions[bot] 78776ac51e Release 0.1.3 (#84)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-05-13 20:27:42 +07:00
Marcus Schiesser 416073db1d fix: use CJS for express (otherwise qdrant doesn't work) and upgrade to 0.3.9 2024-05-13 15:18:45 +02:00
Huu Le (Lee) 84929de8b2 chore: Update vector store imports in vectordbs components (#83) 2024-05-13 19:55:23 +07:00
Huu Le (Lee) 6fe240b854 Merge pull request #81 from sagech/fix/store-qdrant-init
fix: qdrant store init parameters
2024-05-13 16:52:53 +07:00
Sam Cheng Hung 8bb1024d0f fix: qdrant store init parameters 2024-05-12 04:10:47 +08:00
github-actions[bot] 988bfc2a60 Release 0.1.2 (#79)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-05-10 14:12:31 +07:00
Thuc Pham 056e376ee0 feat: add weather widget and weather tool (#72)
---------
Co-authored-by: leehuwuj <leehuwuj@gmail.com>
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2024-05-10 14:00:16 +07:00
Thuc Pham 819cccb11a feat: use 3.5 as default model (#77) 2024-05-09 15:48:25 +07:00
Huu Le (Lee) 8a5ece10c2 chores: update wrong example system prompt and fix missing switch breaking (#75) 2024-05-08 10:14:34 +07:00
github-actions[bot] 63bb0505d6 Release 0.1.1 (#60)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-05-03 10:38:01 +07:00
Huu Le (Lee) 2e80ef47ee Fix typo in settings.py (#73) 2024-05-03 10:36:12 +07:00
Marcus Schiesser a1feb524e9 Revert "Use ingestion pipeline in Python code (#61)"
This reverts commit c094b0c6bf.
2024-05-03 11:06:02 +08:00
Marcus Schiesser 06823da849 fix: stream type 2024-05-02 17:25:49 +08:00
Thuc Pham 7bd3ed551f feat: support anthropic and gemini model providers and update to LITS 0.3.3 (#63)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2024-05-02 16:13:31 +07:00
Huu Le (Lee) c981eb1423 Fix escaping double quotes in vercel_response.py (#68) 2024-04-26 16:24:03 +07:00
Huu Le (Lee) c094b0c6bf Use ingestion pipeline in Python code (#61)
---------
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2024-04-26 14:42:34 +07:00
Marcus Schiesser e2567ffc03 feat: use TOP_K env variable also for TS (#67) 2024-04-26 14:17:53 +07:00
Marcus Schiesser 5d8d752b16 fix: filter none events in Python (#66) 2024-04-26 13:03:17 +08:00
Marcus Schiesser a0b04be23c fix: hide events per default and optimize python messaging (#64) 2024-04-25 14:16:06 +07:00
Marcus Schiesser 94a2809ecd fix: changeset status 2024-04-25 11:42:35 +08:00
Marcus Schiesser e29ef92564 ci: fix pnpm 2024-04-25 11:41:09 +08:00
Marcus Schiesser 6bdd4ac69d ci: name changeset PRs with version 2024-04-25 11:39:45 +08:00
Thuc Pham 1ad25451a6 fix: allow onnxruntime in nextjs server side (#59) 2024-04-24 15:54:50 +07:00
Thuc Pham cfb5257a1e feat: display chat events (#52)
---------

Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
Co-authored-by: leehuwuj <leehuwuj@gmail.com>
2024-04-24 14:20:58 +07:00
github-actions[bot] 046ff06157 Release 0.1.0 (#44)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-04-24 14:06:37 +07:00
Marcus Schiesser 8b81b17984 fix: only render sources with metadata 2024-04-24 14:41:03 +08:00
Marcus Schiesser f1c3e8df69 Adding support for Llama 3 and Phi3 (via Ollama) (#53) 2024-04-24 10:13:45 +07:00
Marcus Schiesser 089916a148 chore: fix format and update to pnpm 9.0.5 2024-04-22 10:17:52 +08:00
Thuc Pham 3bb94da804 feat: show alert when getting chat error (#55) 2024-04-22 10:06:19 +08:00
Thuc Pham 418bf9ba8a refactor: use tsx instead of ts-node (#54) 2024-04-22 10:04:37 +08:00
Marcus Schiesser e5d20b66f6 ci: add npm publish to release script 2024-04-19 10:02:42 +08:00
Thuc Pham ae7b30106d feat: display sources in chat messages (#45)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2024-04-17 15:07:11 +07:00
Marcus Schiesser 5fb64b74ca fix: aws4 warning (#51) 2024-04-16 15:51:56 +07:00
Thuc Pham e4665b6c0d add npmrc file for express to fix long path name issue with node 20 on windows (#49) 2024-04-15 15:15:50 +07:00
Marcus Schiesser 5463d3bf4b fix: nextjs type checks 2024-04-12 17:16:25 +08:00
Marcus Schiesser 7225e916fd fix: use new ToolsFactory 2024-04-12 12:45:26 +08:00
Marcus Schiesser 897feb9914 ci: add github releases action 2024-04-11 15:30:15 +08:00
Marcus Schiesser 66b5f38eda docs: don't autocommit changesets 2024-04-11 15:00:26 +08:00
Marcus Schiesser 1c3d0b19ec feat: Use gpt-4-turbo model as default. Upgrade Python llama-index to 0.10.28 2024-04-10 11:12:25 +08:00
Marcus Schiesser a0dec80d88 docs(changeset): Use gpt-4-turbo model as default. Upgrade Python llama-index to 0.10.28 2024-04-10 11:11:41 +08:00
Anush 1be69a5aa4 feat: Qdrant support (#42) 2024-04-10 09:37:33 +07:00
Thuc Pham 6acccd2b04 feat: use poetry run generate (#41)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2024-04-09 13:45:12 +07:00
Thuc Pham 753229dfae feat: optionally ask to select AI models and use default models (#40)
---------
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2024-04-09 13:25:35 +07:00
Marcus Schiesser 9efcffe112 docs(changeset): Use Settings object for LlamaIndex configuration 2024-04-08 16:28:24 +08:00
Thuc Pham 0cf6386e17 feat: use setting config (#38) 2024-04-08 15:27:07 +07:00
Marcus Schiesser a6e76e1cf3 fix: release script 2024-04-08 11:37:41 +08:00
Marcus Schiesser 2aaf99e256 chore: add changeset release script 2024-04-08 11:34:59 +08:00
Marcus Schiesser 1d78202ef4 docs(changeset): Add observability for Python 2024-04-08 11:19:38 +08:00
Nir Gazit ee2cc66c3b feat: observability for Python with OpenTelemetry (#39) 2024-04-08 10:19:00 +07:00
144 changed files with 6818 additions and 3235 deletions
+1 -1
View File
@@ -1,7 +1,7 @@
{
"$schema": "https://unpkg.com/@changesets/config@3.0.0/schema.json",
"changelog": "@changesets/cli/changelog",
"commit": true,
"commit": false,
"fixed": [],
"linked": [],
"access": "public",
+5
View File
@@ -0,0 +1,5 @@
---
"create-llama": patch
---
Add support E2B code interpreter tool for FastAPI
+12 -2
View File
@@ -24,36 +24,46 @@ jobs:
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- name: Set up python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: ${{ env.POETRY_VERSION }}
- uses: pnpm/action-setup@v2
- uses: pnpm/action-setup@v3
- name: Setup Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
cache: "pnpm"
- name: Install dependencies
run: pnpm install
- name: Install Playwright Browsers
run: pnpm exec playwright install --with-deps
working-directory: .
- name: Build create-llama
run: pnpm run build
working-directory: .
- name: Install
run: pnpm run pack-install
working-directory: .
- name: Run Playwright tests
run: pnpm run e2e
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
working-directory: .
- uses: actions/upload-artifact@v3
if: always()
with:
+6 -3
View File
@@ -13,17 +13,20 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v2
with:
version: latest
- uses: pnpm/action-setup@v3
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version-file: ".nvmrc"
cache: "pnpm"
- name: Install dependencies
run: pnpm install
- name: Run lint
run: pnpm run lint
- name: Run Prettier
run: pnpm run format
+36
View File
@@ -0,0 +1,36 @@
name: Publish to GitHub Releases
on:
push:
tags:
- "v*"
jobs:
build-and-publish:
runs-on: ubuntu-latest
steps:
- name: Checkout Repo
uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version-file: ".nvmrc"
cache: "pnpm"
- name: Install dependencies
run: pnpm install
- name: Build tarball
run: |
pnpm pack
- name: Create release
uses: ncipollo/release-action@v1
with:
artifacts: "create-llama-*.tgz"
name: Release ${{ github.ref }}
bodyFile: "CHANGELOG.md"
token: ${{ secrets.GITHUB_TOKEN }}
+55
View File
@@ -0,0 +1,55 @@
name: Release
on:
push:
branches:
- main
concurrency: ${{ github.workflow }}-${{ github.ref }}
jobs:
release:
name: Release
runs-on: ubuntu-latest
steps:
- name: Checkout Repo
uses: actions/checkout@v4
- uses: pnpm/action-setup@v3
- name: Setup Node.js
uses: actions/setup-node@v4
with:
node-version-file: ".nvmrc"
cache: "pnpm"
- name: Install dependencies
run: pnpm install
- name: Add auth token to .npmrc file
run: |
cat << EOF >> ".npmrc"
//registry.npmjs.org/:_authToken=$NPM_TOKEN
EOF
env:
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
- name: Get changeset status
id: get-changeset-status
run: |
pnpm changeset status --output .changeset/status.json
new_version=$(jq -r '.releases[0].newVersion' < .changeset/status.json)
rm -v .changeset/status.json
echo "new-version=${new_version}" >> "$GITHUB_OUTPUT"
- name: Create Release Pull Request or Publish to npm
id: changesets
uses: changesets/action@v1
with:
commit: Release ${{ steps.get-changeset-status.outputs.new-version }}
title: Release ${{ steps.get-changeset-status.outputs.new-version }}
# build package and call changeset publish
publish: pnpm release
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
+71
View File
@@ -1,5 +1,76 @@
# create-llama
## 0.1.8
### Patch Changes
- cd50a33: Add interpreter tool for TS using e2b.dev
## 0.1.7
### Patch Changes
- 260d37a: Add system prompt env variable for TS
- bbd5b8d: Fix postgres connection leaking issue
- bb53425: Support HTTP proxies by setting the GLOBAL_AGENT_HTTP_PROXY env variable
- 69c2e16: Fix streaming for Express
- 7873bfb: Update Ollama provider to run with the base URL from the environment variable
## 0.1.6
### Patch Changes
- 56537a1: Display PDF files in source nodes
## 0.1.5
### Patch Changes
- 84db798: feat: support display latex in chat markdown
## 0.1.4
### Patch Changes
- 0bc8e75: Use ingestion pipeline for dedicated vector stores (Python only)
- cb1001d: Add ChromaDB vector store
## 0.1.3
### Patch Changes
- 416073d: Directly import vector stores to work with NextJS
## 0.1.2
### Patch Changes
- 056e376: Add support for displaying tool outputs (including weather widget as example)
## 0.1.1
### Patch Changes
- 7bd3ed5: Support Anthropic and Gemini as model providers
- 7bd3ed5: Support new agents from LITS 0.3
- cfb5257: Display events (e.g. retrieving nodes) per chat message
## 0.1.0
### Minor Changes
- f1c3e8d: Add Llama3 and Phi3 support using Ollama
### Patch Changes
- a0dec80: Use `gpt-4-turbo` model as default. Upgrade Python llama-index to 0.10.28
- 753229d: Remove asking for AI models and use defaults instead (OpenAIs GPT-4 Vision Preview and Embeddings v3). Use `--ask-models` CLI parameter to select models.
- 1d78202: Add observability for Python
- 6acccd2: Use poetry run generate to generate embeddings for FastAPI
- 9efcffe: Use Settings object for LlamaIndex configuration
- 418bf9b: refactor: use tsx instead of ts-node
- 1be69a5: Add Qdrant support
## 0.0.32
### Patch Changes
+1 -1
View File
@@ -7,7 +7,7 @@ Install NodeJS. Preferably v18 using nvm or n.
Inside the `create-llama` directory:
```
npm i -g pnpm ts-node
npm i -g pnpm
pnpm install
```
+32 -26
View File
@@ -18,25 +18,19 @@ to start the development server. You can then visit [http://localhost:3000](http
## What you'll get
- A Next.js-powered front-end. The app is set up as a chat interface that can answer questions about your data (see below)
- You can style it with HTML and CSS, or you can optionally use components from [shadcn/ui](https://ui.shadcn.com/)
- A Next.js-powered front-end using components from [shadcn/ui](https://ui.shadcn.com/). The app is set up as a chat interface that can answer questions about your data (see below)
- Your choice of 3 back-ends:
- **Next.js**: if you select this option, youll have a full stack Next.js application that you can deploy to a host like [Vercel](https://vercel.com/) in just a few clicks. This uses [LlamaIndex.TS](https://www.npmjs.com/package/llamaindex), our TypeScript library.
- **Next.js**: if you select this option, youll have a full-stack Next.js application that you can deploy to a host like [Vercel](https://vercel.com/) in just a few clicks. This uses [LlamaIndex.TS](https://www.npmjs.com/package/llamaindex), our TypeScript library.
- **Express**: if you want a more traditional Node.js application you can generate an Express backend. This also uses LlamaIndex.TS.
- **Python FastAPI**: if you select this option youll get a backend powered by the [llama-index python package](https://pypi.org/project/llama-index/), which you can deploy to a service like Render or fly.io.
- The back-end has a single endpoint that allows you to send the state of your chat and receive additional responses
- You can choose whether you want a streaming or non-streaming back-end (if you're not sure, we recommend streaming)
- You can choose whether you want to use `ContextChatEngine` or `SimpleChatEngine`
- `SimpleChatEngine` will just talk to the LLM directly without using your data
- `ContextChatEngine` will use your data to answer questions (see below).
- **Python FastAPI**: if you select this option, youll get a backend powered by the [llama-index python package](https://pypi.org/project/llama-index/), which you can deploy to a service like Render or fly.io.
- The back-end has two endpoints (one streaming, the other one non-streaming) that allow you to send the state of your chat and receive additional responses
- You add arbitrary data sources to your chat, like local files, websites, or data retrieved from a database.
- Turn your chat into an AI agent by adding tools (functions called by the LLM).
- The app uses OpenAI by default, so you'll need an OpenAI API key, or you can customize it to use any of the dozens of LLMs we support.
## Using your data
If you've enabled `ContextChatEngine`, you can supply your own data and the app will index it and answer questions. Your generated app will have a folder called `data`:
- With the Next.js backend this is `./data`
- With the Express or Python backend this is in `./backend/data`
You can supply your own data; the app will index it and answer questions. Your generated app will have a folder called `data` (If you're using Express or Python and generate a frontend, it will be `./backend/data`).
The app will ingest any supported files you put in this directory. Your Next.js and Express apps use LlamaIndex.TS so they will be able to ingest any PDF, text, CSV, Markdown, Word and HTML files. The Python backend can read even more types, including video and audio files.
@@ -46,22 +40,28 @@ Before you can use your data, you need to index it. If you're using the Next.js
npm run generate
```
Then re-start your app. Remember you'll need to re-run `generate` if you add new files to your `data` folder. If you're using the Python backend, you can trigger indexing of your data by deleting the `./storage` folder and re-starting the app.
Then re-start your app. Remember you'll need to re-run `generate` if you add new files to your `data` folder.
## Don't want a front-end?
If you're using the Python backend, you can trigger indexing of your data by calling:
It's optional! If you've selected the Python or Express back-ends, just delete the `frontend` folder and you'll get an API without any front-end code.
```bash
poetry run generate
```
## Customizing the LLM
## Want a front-end?
By default the app will use OpenAI's gpt-3.5-turbo model. If you want to use GPT-4, you can modify this by editing a file:
Optionally generate a frontend if you've selected the Python or Express back-ends. If you do so, `create-llama` will generate two folders: `frontend`, for your Next.js-based frontend code, and `backend` containing your API.
- In the Next.js backend, edit `./app/api/chat/route.ts` and replace `gpt-3.5-turbo` with `gpt-4`
- In the Express backend, edit `./backend/src/controllers/chat.controller.ts` and likewise replace `gpt-3.5-turbo` with `gpt-4`
- In the Python backend, edit `./backend/app/utils/index.py` and once again replace `gpt-3.5-turbo` with `gpt-4`
## Customizing the AI models
The app will default to OpenAI's `gpt-4-turbo` LLM and `text-embedding-3-large` embedding model.
If you want to use different OpenAI models, add the `--ask-models` CLI parameter.
You can also replace OpenAI with one of our [dozens of other supported LLMs](https://docs.llamaindex.ai/en/stable/module_guides/models/llms/modules.html).
To do so, you have to manually change the generated code (edit the `settings.ts` file for Typescript projects or the `settings.py` file for Python projects)
## Example
The simplest thing to do is run `create-llama` in interactive mode:
@@ -84,13 +84,19 @@ Need to install the following packages:
create-llama@latest
Ok to proceed? (y) y
✔ What is your project named? … my-app
✔ Which template would you like to use? Chat with streaming
✔ Which template would you like to use? Chat
✔ Which framework would you like to use? NextJS
✔ Which UI would you like to use? Just HTML
✔ Which chat engine would you like to use? ContextChatEngine
✔ Would you like to set up observability? No
✔ Please provide your OpenAI API key (leave blank to skip): …
✔ Would you like to use ESLint? … No / Yes
Creating a new LlamaIndex app in /home/my-app.
✔ Which data source would you like to use? Use an example PDF
✔ Would you like to add another data source? No
✔ Would you like to use LlamaParse (improved parser for RAG - requires API key)? … no / yes
✔ Would you like to use a vector database? No, just store the data in the file system
? How would you like to proceed? - Use arrow-keys. Return to submit.
Just generate code (~1 sec)
Start in VSCode (~1 sec)
Generate code and install dependencies (~2 min)
Generate code, install dependencies, and run the app (~2 min)
```
### Running non-interactively
+2 -6
View File
@@ -30,10 +30,8 @@ export async function createApp({
appPath,
packageManager,
frontend,
openAiKey,
modelConfig,
llamaCloudKey,
model,
embeddingModel,
communityProjectConfig,
llamapack,
vectorDb,
@@ -77,10 +75,8 @@ export async function createApp({
ui,
packageManager,
isOnline,
openAiKey,
modelConfig,
llamaCloudKey,
model,
embeddingModel,
communityProjectConfig,
llamapack,
vectorDb,
+1 -6
View File
@@ -12,8 +12,7 @@ import {
} from "../helpers";
export type AppType = "--frontend" | "--no-frontend" | "";
const MODEL = "gpt-3.5-turbo";
const EMBEDDING_MODEL = "text-embedding-ada-002";
export type CreateLlamaResult = {
projectName: string;
appProcess: ChildProcess;
@@ -96,10 +95,6 @@ export async function runCreateLlama(
templateUI,
"--vector-db",
vectorDb,
"--model",
MODEL,
"--embedding-model",
EMBEDDING_MODEL,
"--open-ai-key",
process.env.OPENAI_API_KEY,
appType,
+1
View File
@@ -52,6 +52,7 @@ export const copy = async (
export const assetRelocator = (name: string) => {
switch (name) {
case "gitignore":
case "npmrc":
case "eslintrc.json": {
return `.${name}`;
}
+222 -97
View File
@@ -1,12 +1,14 @@
import fs from "fs/promises";
import path from "path";
import { TOOL_SYSTEM_PROMPT_ENV_VAR, Tool } from "./tools";
import {
ModelConfig,
TemplateDataSource,
TemplateFramework,
TemplateVectorDB,
} from "./types";
type EnvVar = {
export type EnvVar = {
name?: string;
description?: string;
value?: string;
@@ -28,14 +30,20 @@ const renderEnvVar = (envVars: EnvVar[]): string => {
);
};
const getVectorDBEnvs = (vectorDb: TemplateVectorDB) => {
const getVectorDBEnvs = (
vectorDb?: TemplateVectorDB,
framework?: TemplateFramework,
): EnvVar[] => {
if (!vectorDb || !framework) {
return [];
}
switch (vectorDb) {
case "mongo":
return [
{
name: "MONGO_URI",
name: "MONGODB_URI",
description:
"For generating a connection URI, see https://docs.timescale.com/use-timescale/latest/services/create-a-service\nThe MongoDB connection URI.",
"For generating a connection URI, see https://www.mongodb.com/docs/manual/reference/connection-string/ \nThe MongoDB connection URI.",
},
{
name: "MONGODB_DATABASE",
@@ -108,51 +116,141 @@ const getVectorDBEnvs = (vectorDb: TemplateVectorDB) => {
description: "The name of the collection in your Astra database",
},
];
case "qdrant":
return [
{
name: "QDRANT_URL",
description:
"The qualified REST URL of the Qdrant server. Eg: http://localhost:6333",
},
{
name: "QDRANT_COLLECTION",
description: "The name of Qdrant collection to use.",
},
{
name: "QDRANT_API_KEY",
description:
"Optional API key for authenticating requests to Qdrant.",
},
];
case "chroma":
const envs = [
{
name: "CHROMA_COLLECTION",
description: "The name of the collection in your Chroma database",
},
{
name: "CHROMA_HOST",
description: "The API endpoint for your Chroma database",
},
{
name: "CHROMA_PORT",
description: "The port for your Chroma database",
},
];
// TS Version doesn't support config local storage path
if (framework === "fastapi") {
envs.push({
name: "CHROMA_PATH",
description: `The local path to the Chroma database.
Specify this if you are using a local Chroma database.
Otherwise, use CHROMA_HOST and CHROMA_PORT config above`,
});
}
return envs;
default:
return [];
}
};
export const createBackendEnvFile = async (
root: string,
opts: {
openAiKey?: string;
llamaCloudKey?: string;
vectorDb?: TemplateVectorDB;
model?: string;
embeddingModel?: string;
framework?: TemplateFramework;
dataSources?: TemplateDataSource[];
port?: number;
},
) => {
// Init env values
const envFileName = ".env";
const defaultEnvs = [
const getModelEnvs = (modelConfig: ModelConfig): EnvVar[] => {
return [
{
name: "MODEL_PROVIDER",
description: "The provider for the AI models to use.",
value: modelConfig.provider,
},
{
render: true,
name: "MODEL",
description: "The name of LLM model to use.",
value: opts.model || "gpt-3.5-turbo",
value: modelConfig.model,
},
{
render: true,
name: "OPENAI_API_KEY",
description: "The OpenAI API key to use.",
value: opts.openAiKey,
name: "EMBEDDING_MODEL",
description: "Name of the embedding model to use.",
value: modelConfig.embeddingModel,
},
{
name: "LLAMA_CLOUD_API_KEY",
description: `The Llama Cloud API key.`,
value: opts.llamaCloudKey,
name: "EMBEDDING_DIM",
description: "Dimension of the embedding model to use.",
value: modelConfig.dimensions.toString(),
},
// Add vector database environment variables
...(opts.vectorDb ? getVectorDBEnvs(opts.vectorDb) : []),
...(modelConfig.provider === "openai"
? [
{
name: "OPENAI_API_KEY",
description: "The OpenAI API key to use.",
value: modelConfig.apiKey,
},
{
name: "LLM_TEMPERATURE",
description: "Temperature for sampling from the model.",
},
{
name: "LLM_MAX_TOKENS",
description: "Maximum number of tokens to generate.",
},
]
: []),
...(modelConfig.provider === "anthropic"
? [
{
name: "ANTHROPIC_API_KEY",
description: "The Anthropic API key to use.",
value: modelConfig.apiKey,
},
]
: []),
...(modelConfig.provider === "gemini"
? [
{
name: "GOOGLE_API_KEY",
description: "The Google API key to use.",
value: modelConfig.apiKey,
},
]
: []),
...(modelConfig.provider === "ollama"
? [
{
name: "OLLAMA_BASE_URL",
description:
"The base URL for the Ollama API. Eg: http://127.0.0.1:11434",
},
]
: []),
];
let envVars: EnvVar[] = [];
if (opts.framework === "fastapi") {
envVars = [
...defaultEnvs,
};
const getFrameworkEnvs = (
framework: TemplateFramework,
port?: number,
): EnvVar[] => {
const sPort = port?.toString() || "8000";
const result: EnvVar[] = [
{
name: "FILESERVER_URL_PREFIX",
description:
"FILESERVER_URL_PREFIX is the URL prefix of the server storing the images generated by the interpreter.",
value:
framework === "nextjs"
? // FIXME: if we are using nextjs, port should be 3000
"http://localhost:3000/api/files"
: `http://localhost:${sPort}/api/files`,
},
];
if (framework === "fastapi") {
result.push(
...[
{
name: "APP_HOST",
@@ -162,60 +260,98 @@ export const createBackendEnvFile = async (
{
name: "APP_PORT",
description: "The port to start the backend app.",
value: opts.port?.toString() || "8000",
},
{
name: "EMBEDDING_MODEL",
description: "Name of the embedding model to use.",
value: opts.embeddingModel,
},
{
name: "EMBEDDING_DIM",
description: "Dimension of the embedding model to use.",
},
{
name: "LLM_TEMPERATURE",
description: "Temperature for sampling from the model.",
},
{
name: "LLM_MAX_TOKENS",
description: "Maximum number of tokens to generate.",
},
{
name: "TOP_K",
description:
"The number of similar embeddings to return when retrieving documents.",
value: "3",
},
{
name: "SYSTEM_PROMPT",
description: `Custom system prompt.
Example:
SYSTEM_PROMPT="
We have provided context information below.
---------------------
{context_str}
---------------------
Given this information, please answer the question: {query_str}
"`,
value: sPort,
},
],
];
} else {
envVars = [
...defaultEnvs,
...[
opts.framework === "nextjs"
? {
name: "NEXT_PUBLIC_MODEL",
description:
"The LLM model to use (hardcode to front-end artifact).",
value: opts.model || "gpt-3.5-turbo",
}
: {},
],
];
);
}
return result;
};
const getEngineEnvs = (): EnvVar[] => {
return [
{
name: "TOP_K",
description:
"The number of similar embeddings to return when retrieving documents.",
value: "3",
},
];
};
const getToolEnvs = (tools?: Tool[]): EnvVar[] => {
if (!tools?.length) return [];
const toolEnvs: EnvVar[] = [];
tools.forEach((tool) => {
if (tool.envVars?.length) {
toolEnvs.push(
// Don't include the system prompt env var here
// It should be handled separately by merging with the default system prompt
...tool.envVars.filter(
(env) => env.name !== TOOL_SYSTEM_PROMPT_ENV_VAR,
),
);
}
});
return toolEnvs;
};
const getSystemPromptEnv = (tools?: Tool[]): EnvVar => {
const defaultSystemPrompt =
"You are a helpful assistant who helps users with their questions.";
// build tool system prompt by merging all tool system prompts
let toolSystemPrompt = "";
tools?.forEach((tool) => {
const toolSystemPromptEnv = tool.envVars?.find(
(env) => env.name === TOOL_SYSTEM_PROMPT_ENV_VAR,
);
if (toolSystemPromptEnv) {
toolSystemPrompt += toolSystemPromptEnv.value + "\n";
}
});
const systemPrompt = toolSystemPrompt
? `\"${toolSystemPrompt}\"`
: defaultSystemPrompt;
return {
name: "SYSTEM_PROMPT",
description: "The system prompt for the AI model.",
value: systemPrompt,
};
};
export const createBackendEnvFile = async (
root: string,
opts: {
llamaCloudKey?: string;
vectorDb?: TemplateVectorDB;
modelConfig: ModelConfig;
framework: TemplateFramework;
dataSources?: TemplateDataSource[];
port?: number;
tools?: Tool[];
},
) => {
// Init env values
const envFileName = ".env";
const envVars: EnvVar[] = [
{
name: "LLAMA_CLOUD_API_KEY",
description: `The Llama Cloud API key.`,
value: opts.llamaCloudKey,
},
// Add model environment variables
...getModelEnvs(opts.modelConfig),
// Add engine environment variables
...getEngineEnvs(),
// Add vector database environment variables
...getVectorDBEnvs(opts.vectorDb, opts.framework),
...getFrameworkEnvs(opts.framework, opts.port),
...getToolEnvs(opts.tools),
getSystemPromptEnv(opts.tools),
];
// Render and write env file
const content = renderEnvVar(envVars);
await fs.writeFile(path.join(root, envFileName), content);
@@ -226,20 +362,9 @@ export const createFrontendEnvFile = async (
root: string,
opts: {
customApiPath?: string;
model?: string;
},
) => {
const defaultFrontendEnvs = [
{
name: "MODEL",
description: "The OpenAI model to use.",
value: opts.model,
},
{
name: "NEXT_PUBLIC_MODEL",
description: "The OpenAI model to use (hardcode to front-end artifact).",
value: opts.model,
},
{
name: "NEXT_PUBLIC_CHAT_API",
description: "The backend API for chat endpoint.",
+15 -11
View File
@@ -15,6 +15,7 @@ import { ConfigFileType, writeToolsConfig } from "./tools";
import {
FileSourceConfig,
InstallTemplateArgs,
ModelConfig,
TemplateDataSource,
TemplateFramework,
TemplateVectorDB,
@@ -24,8 +25,8 @@ import { installTSTemplate } from "./typescript";
// eslint-disable-next-line max-params
async function generateContextData(
framework: TemplateFramework,
modelConfig: ModelConfig,
packageManager?: PackageManager,
openAiKey?: string,
vectorDb?: TemplateVectorDB,
llamaCloudKey?: string,
useLlamaParse?: boolean,
@@ -33,20 +34,20 @@ async function generateContextData(
if (packageManager) {
const runGenerate = `${cyan(
framework === "fastapi"
? "poetry run python app/engine/generate.py"
? "poetry run generate"
: `${packageManager} run generate`,
)}`;
const openAiKeyConfigured = openAiKey || process.env["OPENAI_API_KEY"];
const modelConfigured = modelConfig.isConfigured();
const llamaCloudKeyConfigured = useLlamaParse
? llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
: true;
const hasVectorDb = vectorDb && vectorDb !== "none";
if (openAiKeyConfigured && llamaCloudKeyConfigured && !hasVectorDb) {
if (modelConfigured && llamaCloudKeyConfigured && !hasVectorDb) {
// If all the required environment variables are set, run the generate script
if (framework === "fastapi") {
if (isHavingPoetryLockFile()) {
console.log(`Running ${runGenerate} to generate the context data.`);
const result = tryPoetryRun("python app/engine/generate.py");
const result = tryPoetryRun("poetry run generate");
if (!result) {
console.log(`Failed to run ${runGenerate}.`);
process.exit(1);
@@ -63,7 +64,7 @@ async function generateContextData(
// generate the message of what to do to run the generate script manually
const settings = [];
if (!openAiKeyConfigured) settings.push("your OpenAI key");
if (!modelConfigured) settings.push("your model provider API key");
if (!llamaCloudKeyConfigured) settings.push("your Llama Cloud key");
if (hasVectorDb) settings.push("your Vector DB environment variables");
const settingsMessage =
@@ -141,14 +142,13 @@ export const installTemplate = async (
// Copy the environment file to the target directory.
await createBackendEnvFile(props.root, {
openAiKey: props.openAiKey,
modelConfig: props.modelConfig,
llamaCloudKey: props.llamaCloudKey,
vectorDb: props.vectorDb,
model: props.model,
embeddingModel: props.embeddingModel,
framework: props.framework,
dataSources: props.dataSources,
port: props.externalPort,
tools: props.tools,
});
if (props.dataSources.length > 0) {
@@ -163,18 +163,22 @@ export const installTemplate = async (
) {
await generateContextData(
props.framework,
props.modelConfig,
props.packageManager,
props.openAiKey,
props.vectorDb,
props.llamaCloudKey,
props.useLlamaParse,
);
}
}
// Create tool-output directory
if (props.tools && props.tools.length > 0) {
await fsExtra.mkdir(path.join(props.root, "tool-output"));
}
} else {
// this is a frontend for a full-stack app, create .env file with model information
await createFrontendEnvFile(props.root, {
model: props.model,
customApiPath: props.customApiPath,
});
}
+106
View File
@@ -0,0 +1,106 @@
import ciInfo from "ci-info";
import prompts from "prompts";
import { ModelConfigParams } from ".";
import { questionHandlers, toChoice } from "../../questions";
const MODELS = [
"claude-3-opus",
"claude-3-sonnet",
"claude-3-haiku",
"claude-2.1",
"claude-instant-1.2",
];
const DEFAULT_MODEL = MODELS[0];
// TODO: get embedding vector dimensions from the anthropic sdk (currently not supported)
// Use huggingface embedding models for now
enum HuggingFaceEmbeddingModelType {
XENOVA_ALL_MINILM_L6_V2 = "all-MiniLM-L6-v2",
XENOVA_ALL_MPNET_BASE_V2 = "all-mpnet-base-v2",
}
type ModelData = {
dimensions: number;
};
const EMBEDDING_MODELS: Record<HuggingFaceEmbeddingModelType, ModelData> = {
[HuggingFaceEmbeddingModelType.XENOVA_ALL_MINILM_L6_V2]: {
dimensions: 384,
},
[HuggingFaceEmbeddingModelType.XENOVA_ALL_MPNET_BASE_V2]: {
dimensions: 768,
},
};
const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0];
const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions;
type AnthropicQuestionsParams = {
apiKey?: string;
askModels: boolean;
};
export async function askAnthropicQuestions({
askModels,
apiKey,
}: AnthropicQuestionsParams): Promise<ModelConfigParams> {
const config: ModelConfigParams = {
apiKey,
model: DEFAULT_MODEL,
embeddingModel: DEFAULT_EMBEDDING_MODEL,
dimensions: DEFAULT_DIMENSIONS,
isConfigured(): boolean {
if (config.apiKey) {
return true;
}
if (process.env["ANTHROPIC_API_KEY"]) {
return true;
}
return false;
},
};
if (!config.apiKey) {
const { key } = await prompts(
{
type: "text",
name: "key",
message:
"Please provide your Anthropic API key (or leave blank to use ANTHROPIC_API_KEY env variable):",
},
questionHandlers,
);
config.apiKey = key || process.env.ANTHROPIC_API_KEY;
}
// use default model values in CI or if user should not be asked
const useDefaults = ciInfo.isCI || !askModels;
if (!useDefaults) {
const { model } = await prompts(
{
type: "select",
name: "model",
message: "Which LLM model would you like to use?",
choices: MODELS.map(toChoice),
initial: 0,
},
questionHandlers,
);
config.model = model;
const { embeddingModel } = await prompts(
{
type: "select",
name: "embeddingModel",
message: "Which embedding model would you like to use?",
choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
initial: 0,
},
questionHandlers,
);
config.embeddingModel = embeddingModel;
config.dimensions =
EMBEDDING_MODELS[
embeddingModel as HuggingFaceEmbeddingModelType
].dimensions;
}
return config;
}
+87
View File
@@ -0,0 +1,87 @@
import ciInfo from "ci-info";
import prompts from "prompts";
import { ModelConfigParams } from ".";
import { questionHandlers, toChoice } from "../../questions";
const MODELS = ["gemini-1.5-pro-latest", "gemini-pro", "gemini-pro-vision"];
type ModelData = {
dimensions: number;
};
const EMBEDDING_MODELS: Record<string, ModelData> = {
"embedding-001": { dimensions: 768 },
"text-embedding-004": { dimensions: 768 },
};
const DEFAULT_MODEL = MODELS[0];
const DEFAULT_EMBEDDING_MODEL = Object.keys(EMBEDDING_MODELS)[0];
const DEFAULT_DIMENSIONS = Object.values(EMBEDDING_MODELS)[0].dimensions;
type GeminiQuestionsParams = {
apiKey?: string;
askModels: boolean;
};
export async function askGeminiQuestions({
askModels,
apiKey,
}: GeminiQuestionsParams): Promise<ModelConfigParams> {
const config: ModelConfigParams = {
apiKey,
model: DEFAULT_MODEL,
embeddingModel: DEFAULT_EMBEDDING_MODEL,
dimensions: DEFAULT_DIMENSIONS,
isConfigured(): boolean {
if (config.apiKey) {
return true;
}
if (process.env["GOOGLE_API_KEY"]) {
return true;
}
return false;
},
};
if (!config.apiKey) {
const { key } = await prompts(
{
type: "text",
name: "key",
message:
"Please provide your Google API key (or leave blank to use GOOGLE_API_KEY env variable):",
},
questionHandlers,
);
config.apiKey = key || process.env.GOOGLE_API_KEY;
}
// use default model values in CI or if user should not be asked
const useDefaults = ciInfo.isCI || !askModels;
if (!useDefaults) {
const { model } = await prompts(
{
type: "select",
name: "model",
message: "Which LLM model would you like to use?",
choices: MODELS.map(toChoice),
initial: 0,
},
questionHandlers,
);
config.model = model;
const { embeddingModel } = await prompts(
{
type: "select",
name: "embeddingModel",
message: "Which embedding model would you like to use?",
choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
initial: 0,
},
questionHandlers,
);
config.embeddingModel = embeddingModel;
config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
}
return config;
}
+67
View File
@@ -0,0 +1,67 @@
import ciInfo from "ci-info";
import prompts from "prompts";
import { questionHandlers } from "../../questions";
import { ModelConfig, ModelProvider } from "../types";
import { askAnthropicQuestions } from "./anthropic";
import { askGeminiQuestions } from "./gemini";
import { askOllamaQuestions } from "./ollama";
import { askOpenAIQuestions } from "./openai";
const DEFAULT_MODEL_PROVIDER = "openai";
export type ModelConfigQuestionsParams = {
openAiKey?: string;
askModels: boolean;
};
export type ModelConfigParams = Omit<ModelConfig, "provider">;
export async function askModelConfig({
askModels,
openAiKey,
}: ModelConfigQuestionsParams): Promise<ModelConfig> {
let modelProvider: ModelProvider = DEFAULT_MODEL_PROVIDER;
if (askModels && !ciInfo.isCI) {
const { provider } = await prompts(
{
type: "select",
name: "provider",
message: "Which model provider would you like to use",
choices: [
{
title: "OpenAI",
value: "openai",
},
{ title: "Ollama", value: "ollama" },
{ title: "Anthropic", value: "anthropic" },
{ title: "Gemini", value: "gemini" },
],
initial: 0,
},
questionHandlers,
);
modelProvider = provider;
}
let modelConfig: ModelConfigParams;
switch (modelProvider) {
case "ollama":
modelConfig = await askOllamaQuestions({ askModels });
break;
case "anthropic":
modelConfig = await askAnthropicQuestions({ askModels });
break;
case "gemini":
modelConfig = await askGeminiQuestions({ askModels });
break;
default:
modelConfig = await askOpenAIQuestions({
openAiKey,
askModels,
});
}
return {
...modelConfig,
provider: modelProvider,
};
}
+95
View File
@@ -0,0 +1,95 @@
import ciInfo from "ci-info";
import ollama, { type ModelResponse } from "ollama";
import { red } from "picocolors";
import prompts from "prompts";
import { ModelConfigParams } from ".";
import { questionHandlers, toChoice } from "../../questions";
type ModelData = {
dimensions: number;
};
const MODELS = ["llama3:8b", "wizardlm2:7b", "gemma:7b", "phi3"];
const DEFAULT_MODEL = MODELS[0];
// TODO: get embedding vector dimensions from the ollama sdk (currently not supported)
const EMBEDDING_MODELS: Record<string, ModelData> = {
"nomic-embed-text": { dimensions: 768 },
"mxbai-embed-large": { dimensions: 1024 },
"all-minilm": { dimensions: 384 },
};
const DEFAULT_EMBEDDING_MODEL: string = Object.keys(EMBEDDING_MODELS)[0];
type OllamaQuestionsParams = {
askModels: boolean;
};
export async function askOllamaQuestions({
askModels,
}: OllamaQuestionsParams): Promise<ModelConfigParams> {
const config: ModelConfigParams = {
model: DEFAULT_MODEL,
embeddingModel: DEFAULT_EMBEDDING_MODEL,
dimensions: EMBEDDING_MODELS[DEFAULT_EMBEDDING_MODEL].dimensions,
isConfigured(): boolean {
return true;
},
};
// use default model values in CI or if user should not be asked
const useDefaults = ciInfo.isCI || !askModels;
if (!useDefaults) {
const { model } = await prompts(
{
type: "select",
name: "model",
message: "Which LLM model would you like to use?",
choices: MODELS.map(toChoice),
initial: 0,
},
questionHandlers,
);
await ensureModel(model);
config.model = model;
const { embeddingModel } = await prompts(
{
type: "select",
name: "embeddingModel",
message: "Which embedding model would you like to use?",
choices: Object.keys(EMBEDDING_MODELS).map(toChoice),
initial: 0,
},
questionHandlers,
);
await ensureModel(embeddingModel);
config.embeddingModel = embeddingModel;
config.dimensions = EMBEDDING_MODELS[embeddingModel].dimensions;
}
return config;
}
async function ensureModel(modelName: string) {
try {
if (modelName.split(":").length === 1) {
// model doesn't have a version suffix, use latest
modelName = modelName + ":latest";
}
const { models } = await ollama.list();
const found =
models.find((model: ModelResponse) => model.name === modelName) !==
undefined;
if (!found) {
console.log(
red(
`Model ${modelName} was not pulled yet. Call 'ollama pull ${modelName}' and try again.`,
),
);
process.exit(1);
}
} catch (error) {
console.log(
red("Listing Ollama models failed. Is 'ollama' running? " + error),
);
process.exit(1);
}
}
+144
View File
@@ -0,0 +1,144 @@
import ciInfo from "ci-info";
import got from "got";
import ora from "ora";
import { red } from "picocolors";
import prompts from "prompts";
import { ModelConfigParams, ModelConfigQuestionsParams } from ".";
import { questionHandlers } from "../../questions";
const OPENAI_API_URL = "https://api.openai.com/v1";
const DEFAULT_MODEL = "gpt-3.5-turbo";
const DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large";
export async function askOpenAIQuestions({
openAiKey,
askModels,
}: ModelConfigQuestionsParams): Promise<ModelConfigParams> {
const config: ModelConfigParams = {
apiKey: openAiKey,
model: DEFAULT_MODEL,
embeddingModel: DEFAULT_EMBEDDING_MODEL,
dimensions: getDimensions(DEFAULT_EMBEDDING_MODEL),
isConfigured(): boolean {
if (config.apiKey) {
return true;
}
if (process.env["OPENAI_API_KEY"]) {
return true;
}
return false;
},
};
if (!config.apiKey) {
const { key } = await prompts(
{
type: "text",
name: "key",
message: askModels
? "Please provide your OpenAI API key (or leave blank to use OPENAI_API_KEY env variable):"
: "Please provide your OpenAI API key (leave blank to skip):",
validate: (value: string) => {
if (askModels && !value) {
if (process.env.OPENAI_API_KEY) {
return true;
}
return "OPENAI_API_KEY env variable is not set - key is required";
}
return true;
},
},
questionHandlers,
);
config.apiKey = key || process.env.OPENAI_API_KEY;
}
// use default model values in CI or if user should not be asked
const useDefaults = ciInfo.isCI || !askModels;
if (!useDefaults) {
const { model } = await prompts(
{
type: "select",
name: "model",
message: "Which LLM model would you like to use?",
choices: await getAvailableModelChoices(false, config.apiKey),
initial: 0,
},
questionHandlers,
);
config.model = model;
const { embeddingModel } = await prompts(
{
type: "select",
name: "embeddingModel",
message: "Which embedding model would you like to use?",
choices: await getAvailableModelChoices(true, config.apiKey),
initial: 0,
},
questionHandlers,
);
config.embeddingModel = embeddingModel;
config.dimensions = getDimensions(embeddingModel);
}
return config;
}
async function getAvailableModelChoices(
selectEmbedding: boolean,
apiKey?: string,
) {
if (!apiKey) {
throw new Error("need OpenAI key to retrieve model choices");
}
const isLLMModel = (modelId: string) => {
return modelId.startsWith("gpt");
};
const isEmbeddingModel = (modelId: string) => {
return modelId.includes("embedding");
};
const spinner = ora("Fetching available models").start();
try {
const response = await got(`${OPENAI_API_URL}/models`, {
headers: {
Authorization: "Bearer " + apiKey,
},
timeout: 5000,
responseType: "json",
});
const data: any = await response.body;
spinner.stop();
return data.data
.filter((model: any) =>
selectEmbedding ? isEmbeddingModel(model.id) : isLLMModel(model.id),
)
.map((el: any) => {
return {
title: el.id,
value: el.id,
};
});
} catch (error) {
spinner.stop();
if ((error as any).response?.statusCode === 401) {
console.log(
red(
"Invalid OpenAI API key provided! Please provide a valid key and try again!",
),
);
} else {
console.log(red("Request failed: " + error));
}
process.exit(1);
}
}
function getDimensions(modelName: string) {
// at 2024-04-24 all OpenAI embedding models support 1536 dimensions except
// "text-embedding-3-large", see https://openai.com/blog/new-embedding-models-and-api-updates
return modelName === "text-embedding-3-large" ? 1024 : 1536;
}
+8
View File
@@ -0,0 +1,8 @@
/* Function to conditionally load the global-agent/bootstrap module */
export async function initializeGlobalAgent() {
if (process.env.GLOBAL_AGENT_HTTP_PROXY) {
/* Dynamically import global-agent/bootstrap */
await import("global-agent/bootstrap");
console.log("Proxy enabled via global-agent.");
}
}
+122 -35
View File
@@ -10,6 +10,7 @@ import { isPoetryAvailable, tryPoetryInstall } from "./poetry";
import { Tool } from "./tools";
import {
InstallTemplateArgs,
ModelConfig,
TemplateDataSource,
TemplateVectorDB,
} from "./types";
@@ -21,8 +22,9 @@ interface Dependency {
}
const getAdditionalDependencies = (
modelConfig: ModelConfig,
vectorDb?: TemplateVectorDB,
dataSource?: TemplateDataSource,
dataSources?: TemplateDataSource[],
tools?: Tool[],
) => {
const dependencies: Dependency[] = [];
@@ -41,6 +43,7 @@ const getAdditionalDependencies = (
name: "llama-index-vector-stores-postgres",
version: "^0.1.1",
});
break;
}
case "pinecone": {
dependencies.push({
@@ -67,47 +70,105 @@ const getAdditionalDependencies = (
});
break;
}
case "qdrant": {
dependencies.push({
name: "llama-index-vector-stores-qdrant",
version: "^0.2.8",
});
break;
}
case "chroma": {
dependencies.push({
name: "llama-index-vector-stores-chroma",
version: "^0.1.8",
});
break;
}
}
// Add data source dependencies
const dataSourceType = dataSource?.type;
switch (dataSourceType) {
case "file":
dependencies.push({
name: "docx2txt",
version: "^0.8",
});
break;
case "web":
dependencies.push({
name: "llama-index-readers-web",
version: "^0.1.6",
});
break;
case "db":
dependencies.push({
name: "llama-index-readers-database",
version: "^0.1.3",
});
dependencies.push({
name: "pymysql",
version: "^1.1.0",
extras: ["rsa"],
});
dependencies.push({
name: "psycopg2",
version: "^2.9.9",
});
break;
if (dataSources) {
for (const ds of dataSources) {
const dsType = ds?.type;
switch (dsType) {
case "file":
dependencies.push({
name: "docx2txt",
version: "^0.8",
});
break;
case "web":
dependencies.push({
name: "llama-index-readers-web",
version: "^0.1.6",
});
break;
case "db":
dependencies.push({
name: "llama-index-readers-database",
version: "^0.1.3",
});
dependencies.push({
name: "pymysql",
version: "^1.1.0",
extras: ["rsa"],
});
dependencies.push({
name: "psycopg2",
version: "^2.9.9",
});
break;
}
}
}
// Add tools dependencies
console.log("Adding tools dependencies");
tools?.forEach((tool) => {
tool.dependencies?.forEach((dep) => {
dependencies.push(dep);
});
});
switch (modelConfig.provider) {
case "ollama":
dependencies.push({
name: "llama-index-llms-ollama",
version: "0.1.2",
});
dependencies.push({
name: "llama-index-embeddings-ollama",
version: "0.1.2",
});
break;
case "openai":
dependencies.push({
name: "llama-index-agent-openai",
version: "0.2.2",
});
break;
case "anthropic":
dependencies.push({
name: "llama-index-llms-anthropic",
version: "0.1.10",
});
dependencies.push({
name: "llama-index-embeddings-huggingface",
version: "0.2.0",
});
break;
case "gemini":
dependencies.push({
name: "llama-index-llms-gemini",
version: "0.1.7",
});
dependencies.push({
name: "llama-index-embeddings-gemini",
version: "0.1.6",
});
break;
}
return dependencies;
};
@@ -205,7 +266,8 @@ export const installPythonTemplate = async ({
dataSources,
tools,
postInstallAction,
useLlamaParse,
observability,
modelConfig,
}: Pick<
InstallTemplateArgs,
| "root"
@@ -214,8 +276,9 @@ export const installPythonTemplate = async ({
| "vectorDb"
| "dataSources"
| "tools"
| "useLlamaParse"
| "postInstallAction"
| "observability"
| "modelConfig"
>) => {
console.log("\nInitializing Python project with template:", template, "\n");
const templatePath = path.join(templatesDir, "types", template, framework);
@@ -255,9 +318,33 @@ export const installPythonTemplate = async ({
cwd: path.join(compPath, "engines", "python", engine),
});
const addOnDependencies = dataSources
.map((ds) => getAdditionalDependencies(vectorDb, ds, tools))
.flat();
console.log("Adding additional dependencies");
const addOnDependencies = getAdditionalDependencies(
modelConfig,
vectorDb,
dataSources,
tools,
);
if (observability === "opentelemetry") {
addOnDependencies.push({
name: "traceloop-sdk",
version: "^0.15.11",
});
const templateObservabilityPath = path.join(
templatesDir,
"components",
"observability",
"python",
"opentelemetry",
);
await copy("**", path.join(root, "app"), {
cwd: templateObservabilityPath,
});
}
await addDependencies(root, addOnDependencies);
if (postInstallAction === "runApp" || postInstallAction === "dependencies") {
+82 -2
View File
@@ -2,15 +2,25 @@ import fs from "fs/promises";
import path from "path";
import { red } from "picocolors";
import yaml from "yaml";
import { EnvVar } from "./env-variables";
import { makeDir } from "./make-dir";
import { TemplateFramework } from "./types";
export const TOOL_SYSTEM_PROMPT_ENV_VAR = "TOOL_SYSTEM_PROMPT";
export enum ToolType {
LLAMAHUB = "llamahub",
LOCAL = "local",
}
export type Tool = {
display: string;
name: string;
config?: Record<string, any>;
dependencies?: ToolDependencies[];
supportedFrameworks?: Array<TemplateFramework>;
type: ToolType;
envVars?: EnvVar[];
};
export type ToolDependencies = {
@@ -35,6 +45,14 @@ export const supportedTools: Tool[] = [
},
],
supportedFrameworks: ["fastapi"],
type: ToolType.LLAMAHUB,
envVars: [
{
name: TOOL_SYSTEM_PROMPT_ENV_VAR,
description: "System prompt for google search tool.",
value: `You are a Google search agent. You help users to get information from Google search.`,
},
],
},
{
display: "Wikipedia",
@@ -46,6 +64,58 @@ export const supportedTools: Tool[] = [
},
],
supportedFrameworks: ["fastapi", "express", "nextjs"],
type: ToolType.LLAMAHUB,
envVars: [
{
name: TOOL_SYSTEM_PROMPT_ENV_VAR,
description: "System prompt for wiki tool.",
value: `You are a Wikipedia agent. You help users to get information from Wikipedia.`,
},
],
},
{
display: "Weather",
name: "weather",
dependencies: [],
supportedFrameworks: ["fastapi", "express", "nextjs"],
type: ToolType.LOCAL,
envVars: [
{
name: TOOL_SYSTEM_PROMPT_ENV_VAR,
description: "System prompt for weather tool.",
value: `You are a weather forecast agent. You help users to get the weather forecast for a given location.`,
},
],
},
{
display: "Code Interpreter",
name: "interpreter",
dependencies: [
{
name: "e2b_code_interpreter",
version: "0.0.7",
},
],
supportedFrameworks: ["fastapi", "express", "nextjs"],
type: ToolType.LOCAL,
envVars: [
{
name: "E2B_API_KEY",
description:
"E2B_API_KEY key is required to run code interpreter tool. Get it here: https://e2b.dev/docs/getting-started/api-key",
},
{
name: TOOL_SYSTEM_PROMPT_ENV_VAR,
description: "System prompt for code interpreter tool.",
value: `You are a Python interpreter.
- You are given tasks to complete and you run python code to solve them.
- The python code runs in a Jupyter notebook. Every time you call \`interpreter\` tool, the python code is executed in a separate cell. It's okay to make multiple calls to \`interpreter\`.
- Display visualizations using matplotlib or any other visualization library directly in the notebook. Shouldn't save the visualizations to a file, just return the base64 encoded data.
- You can install any pip package (if it exists) if you need to but the usual packages for data analysis are already preinstalled.
- You can run any python code you want in a secure environment.
- Use absolute url from result to display images or any other media.`,
},
],
},
];
@@ -90,9 +160,19 @@ export const writeToolsConfig = async (
type: ConfigFileType = ConfigFileType.YAML,
) => {
if (tools.length === 0) return; // no tools selected, no config need
const configContent: Record<string, any> = {};
const configContent: {
[key in ToolType]: Record<string, any>;
} = {
local: {},
llamahub: {},
};
tools.forEach((tool) => {
configContent[tool.name] = tool.config ?? {};
if (tool.type === ToolType.LLAMAHUB) {
configContent.llamahub[tool.name] = tool.config ?? {};
}
if (tool.type === ToolType.LOCAL) {
configContent.local[tool.name] = tool.config ?? {};
}
});
const configPath = path.join(root, "config");
await makeDir(configPath);
+13 -4
View File
@@ -1,6 +1,15 @@
import { PackageManager } from "../helpers/get-pkg-manager";
import { Tool } from "./tools";
export type ModelProvider = "openai" | "ollama" | "anthropic" | "gemini";
export type ModelConfig = {
provider: ModelProvider;
apiKey?: string;
model: string;
embeddingModel: string;
dimensions: number;
isConfigured(): boolean;
};
export type TemplateType = "streaming" | "community" | "llamapack";
export type TemplateFramework = "nextjs" | "express" | "fastapi";
export type TemplateUI = "html" | "shadcn";
@@ -10,7 +19,9 @@ export type TemplateVectorDB =
| "pg"
| "pinecone"
| "milvus"
| "astra";
| "astra"
| "qdrant"
| "chroma";
export type TemplatePostInstallAction =
| "none"
| "VSCode"
@@ -58,11 +69,9 @@ export interface InstallTemplateArgs {
ui: TemplateUI;
dataSources: TemplateDataSource[];
customApiPath?: string;
openAiKey?: string;
modelConfig: ModelConfig;
llamaCloudKey?: string;
useLlamaParse?: boolean;
model: string;
embeddingModel: string;
communityProjectConfig?: CommunityProjectConfig;
llamapack?: string;
vectorDb?: TemplateVectorDB;
+3 -3
View File
@@ -105,7 +105,7 @@ export const installTSTemplate = async ({
const enginePath = path.join(root, relativeEngineDestPath, "engine");
// copy vector db component
console.log("\nUsing vector DB:", vectorDb, "\n");
console.log("\nUsing vector DB:", vectorDb ?? "none", "\n");
await copy("**", enginePath, {
parents: true,
cwd: path.join(compPath, "vectordbs", "typescript", vectorDb ?? "none"),
@@ -200,12 +200,12 @@ async function updatePackageJson({
packageJson.name = appName;
packageJson.version = "0.1.0";
if (dataSources.length > 0 && relativeEngineDestPath) {
if (relativeEngineDestPath) {
// TODO: move script to {root}/scripts for all frameworks
// add generate script if using context engine
packageJson.scripts = {
...packageJson.scripts,
generate: `ts-node ${path.join(
generate: `tsx ${path.join(
relativeEngineDestPath,
"engine",
"generate.ts",
+25 -23
View File
@@ -12,12 +12,16 @@ import { createApp } from "./create-app";
import { getDataSources } from "./helpers/datasources";
import { getPkgManager } from "./helpers/get-pkg-manager";
import { isFolderEmpty } from "./helpers/is-folder-empty";
import { initializeGlobalAgent } from "./helpers/proxy";
import { runApp } from "./helpers/run-app";
import { getTools } from "./helpers/tools";
import { validateNpmName } from "./helpers/validate-pkg";
import packageJson from "./package.json";
import { QuestionArgs, askQuestions, onPromptState } from "./questions";
// Run the initialization function
initializeGlobalAgent();
let projectPath: string = "";
const handleSigTerm = () => process.exit(0);
@@ -78,7 +82,7 @@ const program = new Commander.Command(packageJson.name)
"--files <path>",
`
Specify the path to a local file or folder for chatting.
Specify the path to a local file or folder for chatting.
`,
)
.option(
@@ -107,19 +111,6 @@ const program = new Commander.Command(packageJson.name)
`
Whether to generate a frontend for your backend.
`,
)
.option(
"--model <model>",
`
Select OpenAI model to use. E.g. gpt-3.5-turbo.
`,
)
.option(
"--embedding-model <embeddingModel>",
`
Select OpenAI embedding model to use. E.g. text-embedding-ada-002.
`,
)
.option(
@@ -160,22 +151,30 @@ const program = new Commander.Command(packageJson.name)
.option(
"--use-llama-parse",
`
Enable LlamaParse.
Enable LlamaParse.
`,
)
.option(
"--llama-cloud-key <key>",
`
Provide a LlamaCloud API key.
`,
)
.option(
"--list-server-models",
"Fetch available LLM and embedding models from OpenAI API.",
"--observability <observability>",
`
Specify observability tools to use. Eg: none, opentelemetry
`,
)
.option(
"--observability <observability>",
"Specify observability tools to use. Eg: none, opentelemetry",
"--ask-models",
`
Select LLM and embedding models.
`,
)
.allowUnknownOption()
.parse(process.argv);
@@ -192,6 +191,7 @@ if (process.argv.includes("--tools")) {
if (process.argv.includes("--no-llama-parse")) {
program.useLlamaParse = false;
}
program.askModels = process.argv.includes("--ask-models");
if (process.argv.includes("--no-files")) {
program.dataSources = [];
} else {
@@ -278,7 +278,11 @@ async function run(): Promise<void> {
}
const preferences = (conf.get("preferences") || {}) as QuestionArgs;
await askQuestions(program as unknown as QuestionArgs, preferences);
await askQuestions(
program as unknown as QuestionArgs,
preferences,
program.openAiKey,
);
await createApp({
template: program.template,
@@ -287,10 +291,8 @@ async function run(): Promise<void> {
appPath: resolvedProjectPath,
packageManager,
frontend: program.frontend,
openAiKey: program.openAiKey,
modelConfig: program.modelConfig,
llamaCloudKey: program.llamaCloudKey,
model: program.model,
embeddingModel: program.embeddingModel,
communityProjectConfig: program.communityProjectConfig,
llamapack: program.llamapack,
vectorDb: program.vectorDb,
+27 -23
View File
@@ -1,12 +1,12 @@
{
"name": "create-llama",
"version": "0.0.32",
"version": "0.1.8",
"description": "Create LlamaIndex-powered apps with one command",
"keywords": [
"rag",
"llamaindex",
"next.js"
],
"description": "Create LlamaIndex-powered apps with one command",
"repository": {
"type": "git",
"url": "https://github.com/run-llama/LlamaIndexTS",
@@ -20,32 +20,30 @@
"dist"
],
"scripts": {
"clean": "rimraf --glob ./dist ./templates/**/__pycache__ ./templates/**/node_modules ./templates/**/poetry.lock",
"format": "prettier --ignore-unknown --cache --check .",
"format:write": "prettier --ignore-unknown --write .",
"dev": "ncc build ./index.ts -w -o dist/",
"build": "bash ./scripts/build.sh",
"build:ncc": "pnpm run clean && ncc build ./index.ts -o ./dist/ --minify --no-cache --no-source-map-register",
"lint": "eslint . --ignore-pattern dist --ignore-pattern e2e/cache",
"clean": "rimraf --glob ./dist ./templates/**/__pycache__ ./templates/**/node_modules ./templates/**/poetry.lock",
"dev": "ncc build ./index.ts -w -o dist/",
"e2e": "playwright test",
"format": "prettier --ignore-unknown --cache --check .",
"format:write": "prettier --ignore-unknown --write .",
"lint": "eslint . --ignore-pattern dist --ignore-pattern e2e/cache",
"new-snapshot": "pnpm run build && changeset version --snapshot",
"new-version": "pnpm run build && changeset version",
"pack-install": "bash ./scripts/pack.sh",
"prepare": "husky",
"release": "pnpm run build && changeset publish",
"new-version": "pnpm run build && changeset version",
"release-snapshot": "pnpm run build && changeset publish --tag snapshot",
"new-snapshot": "pnpm run build && changeset version --snapshot",
"pack-install": "bash ./scripts/pack.sh"
"release-snapshot": "pnpm run build && changeset publish --tag snapshot"
},
"devDependencies": {
"@playwright/test": "^1.41.1",
"dependencies": {
"@types/async-retry": "1.4.2",
"@types/ci-info": "2.0.0",
"@types/cross-spawn": "6.0.0",
"@types/fs-extra": "11.0.4",
"@types/node": "^20.11.7",
"@types/prompts": "2.0.1",
"@types/tar": "6.1.5",
"@types/validate-npm-package-name": "3.0.0",
"@types/fs-extra": "11.0.4",
"@vercel/ncc": "0.38.1",
"async-retry": "1.3.1",
"async-sema": "3.0.1",
"ci-info": "github:watson/ci-info#f43f6a1cefff47fb361c88cf4b943fdbcaafe540",
@@ -53,29 +51,35 @@
"conf": "10.2.0",
"cross-spawn": "7.0.3",
"fast-glob": "3.3.1",
"fs-extra": "11.2.0",
"global-agent": "^3.0.0",
"got": "10.7.0",
"ollama": "^0.5.0",
"ora": "^8.0.1",
"picocolors": "1.0.0",
"prompts": "2.1.0",
"rimraf": "^5.0.5",
"smol-toml": "^1.1.4",
"tar": "6.1.15",
"terminal-link": "^3.0.0",
"update-check": "1.5.4",
"validate-npm-package-name": "3.0.0",
"wait-port": "^1.1.0",
"yaml": "2.4.1"
},
"devDependencies": {
"@changesets/cli": "^2.27.1",
"@playwright/test": "^1.41.1",
"@vercel/ncc": "0.38.1",
"eslint": "^8.56.0",
"eslint-config-prettier": "^8.10.0",
"husky": "^9.0.10",
"prettier": "^3.2.5",
"prettier-plugin-organize-imports": "^3.2.4",
"rimraf": "^5.0.5",
"typescript": "^5.3.3",
"eslint-config-prettier": "^8.10.0",
"ora": "^8.0.1",
"fs-extra": "11.2.0",
"yaml": "2.4.1"
"wait-port": "^1.1.0"
},
"packageManager": "pnpm@9.0.5",
"engines": {
"node": ">=16.14.0"
},
"packageManager": "pnpm@8.15.1"
}
}
+2591 -1974
View File
File diff suppressed because it is too large Load Diff
+49 -185
View File
@@ -1,8 +1,6 @@
import { execSync } from "child_process";
import ciInfo from "ci-info";
import fs from "fs";
import got from "got";
import ora from "ora";
import path from "path";
import { blue, green, red } from "picocolors";
import prompts from "prompts";
@@ -16,16 +14,15 @@ import { COMMUNITY_OWNER, COMMUNITY_REPO } from "./helpers/constant";
import { EXAMPLE_FILE } from "./helpers/datasources";
import { templatesDir } from "./helpers/dir";
import { getAvailableLlamapackOptions } from "./helpers/llama-pack";
import { askModelConfig } from "./helpers/providers";
import { getProjectOptions } from "./helpers/repo";
import { supportedTools, toolsRequireConfig } from "./helpers/tools";
const OPENAI_API_URL = "https://api.openai.com/v1";
export type QuestionArgs = Omit<
InstallAppArgs,
"appPath" | "packageManager"
> & {
listServerModels?: boolean;
askModels?: boolean;
};
const supportedContextFileTypes = [
".pdf",
@@ -67,16 +64,13 @@ if ($dialogResult -eq [System.Windows.Forms.DialogResult]::OK)
}
`;
const defaults: QuestionArgs = {
const defaults: Omit<QuestionArgs, "modelConfig"> = {
template: "streaming",
framework: "nextjs",
ui: "shadcn",
frontend: false,
openAiKey: "",
llamaCloudKey: "",
useLlamaParse: false,
model: "gpt-3.5-turbo",
embeddingModel: "text-embedding-ada-002",
communityProjectConfig: undefined,
llamapack: "",
postInstallAction: "dependencies",
@@ -84,7 +78,7 @@ const defaults: QuestionArgs = {
tools: [],
};
const handlers = {
export const questionHandlers = {
onCancel: () => {
console.error("Exiting.");
process.exit(1);
@@ -102,6 +96,8 @@ const getVectorDbChoices = (framework: TemplateFramework) => {
{ title: "Pinecone", value: "pinecone" },
{ title: "Milvus", value: "milvus" },
{ title: "Astra", value: "astra" },
{ title: "Qdrant", value: "qdrant" },
{ title: "ChromaDB", value: "chroma" },
];
const vectordbLang = framework === "fastapi" ? "python" : "typescript";
@@ -231,85 +227,15 @@ export const onPromptState = (state: any) => {
}
};
const getAvailableModelChoices = async (
selectEmbedding: boolean,
apiKey?: string,
listServerModels?: boolean,
) => {
const defaultLLMModels = [
"gpt-3.5-turbo-0125",
"gpt-4-turbo-preview",
"gpt-4",
"gpt-4-vision-preview",
];
const defaultEmbeddingModels = [
"text-embedding-ada-002",
"text-embedding-3-small",
"text-embedding-3-large",
];
const isLLMModels = (model_id: string) => {
return model_id.startsWith("gpt");
};
const isEmbeddingModel = (model_id: string) => {
return (
model_id.includes("embedding") ||
defaultEmbeddingModels.includes(model_id)
);
};
if (apiKey && listServerModels) {
const spinner = ora("Fetching available models").start();
try {
const response = await got(`${OPENAI_API_URL}/models`, {
headers: {
Authorization: "Bearer " + apiKey,
},
timeout: 5000,
responseType: "json",
});
const data: any = await response.body;
spinner.stop();
return data.data
.filter((model: any) =>
selectEmbedding ? isEmbeddingModel(model.id) : isLLMModels(model.id),
)
.map((el: any) => {
return {
title: el.id,
value: el.id,
};
});
} catch (error) {
spinner.stop();
if ((error as any).response?.statusCode === 401) {
console.log(
red(
"Invalid OpenAI API key provided! Please provide a valid key and try again!",
),
);
} else {
console.log(red("Request failed: " + error));
}
process.exit(1);
}
} else {
const data = selectEmbedding ? defaultEmbeddingModels : defaultLLMModels;
return data.map((model) => ({
title: model,
value: model,
}));
}
};
export const askQuestions = async (
program: QuestionArgs,
preferences: QuestionArgs,
openAiKey?: string,
) => {
const getPrefOrDefault = <K extends keyof QuestionArgs>(
const getPrefOrDefault = <K extends keyof Omit<QuestionArgs, "modelConfig">>(
field: K,
): QuestionArgs[K] => preferences[field] ?? defaults[field];
): Omit<QuestionArgs, "modelConfig">[K] =>
preferences[field] ?? defaults[field];
// Ask for next action after installation
async function askPostInstallAction() {
@@ -332,8 +258,8 @@ export const askQuestions = async (
},
];
const openAiKeyConfigured =
program.openAiKey || process.env["OPENAI_API_KEY"];
const modelConfigured =
!program.llamapack && program.modelConfig.isConfigured();
// If using LlamaParse, require LlamaCloud API key
const llamaCloudKeyConfigured = program.useLlamaParse
? program.llamaCloudKey || process.env["LLAMA_CLOUD_API_KEY"]
@@ -342,10 +268,9 @@ export const askQuestions = async (
// Can run the app if all tools do not require configuration
if (
!hasVectorDb &&
openAiKeyConfigured &&
modelConfigured &&
llamaCloudKeyConfigured &&
!toolsRequireConfig(program.tools) &&
!program.llamapack
!toolsRequireConfig(program.tools)
) {
actionChoices.push({
title:
@@ -362,7 +287,7 @@ export const askQuestions = async (
choices: actionChoices,
initial: 1,
},
handlers,
questionHandlers,
);
program.postInstallAction = action;
@@ -395,7 +320,7 @@ export const askQuestions = async (
],
initial: 0,
},
handlers,
questionHandlers,
);
program.template = template;
preferences.template = template;
@@ -418,7 +343,7 @@ export const askQuestions = async (
})),
initial: 0,
},
handlers,
questionHandlers,
);
const projectConfig = JSON.parse(communityProjectConfig);
program.communityProjectConfig = projectConfig;
@@ -439,7 +364,7 @@ export const askQuestions = async (
})),
initial: 0,
},
handlers,
questionHandlers,
);
program.llamapack = llamapack;
preferences.llamapack = llamapack;
@@ -465,7 +390,7 @@ export const askQuestions = async (
choices,
initial: 0,
},
handlers,
questionHandlers,
);
program.framework = framework;
preferences.framework = framework;
@@ -474,7 +399,6 @@ export const askQuestions = async (
if (program.framework === "express" || program.framework === "fastapi") {
// if a backend-only framework is selected, ask whether we should create a frontend
// (only for streaming backends)
if (program.frontend === undefined) {
if (ciInfo.isCI) {
program.frontend = getPrefOrDefault("frontend");
@@ -510,100 +434,36 @@ export const askQuestions = async (
}
}
if (program.framework === "express" || program.framework === "nextjs") {
if (!program.observability) {
if (ciInfo.isCI) {
program.observability = getPrefOrDefault("observability");
} else {
const { observability } = await prompts(
{
type: "select",
name: "observability",
message: "Would you like to set up observability?",
choices: [
{ title: "No", value: "none" },
{ title: "OpenTelemetry", value: "opentelemetry" },
],
initial: 0,
},
handlers,
);
program.observability = observability;
preferences.observability = observability;
}
}
}
if (!program.openAiKey) {
const { key } = await prompts(
{
type: "text",
name: "key",
message: program.listServerModels
? "Please provide your OpenAI API key (or reuse OPENAI_API_KEY env variable):"
: "Please provide your OpenAI API key (leave blank to skip):",
validate: (value: string) => {
if (program.listServerModels && !value) {
if (process.env.OPENAI_API_KEY) {
return true;
}
return "OpenAI API key is required";
}
return true;
},
},
handlers,
);
program.openAiKey = key || process.env.OPENAI_API_KEY;
preferences.openAiKey = key || process.env.OPENAI_API_KEY;
}
if (!program.model) {
if (!program.observability) {
if (ciInfo.isCI) {
program.model = getPrefOrDefault("model");
program.observability = getPrefOrDefault("observability");
} else {
const { model } = await prompts(
const { observability } = await prompts(
{
type: "select",
name: "model",
message: "Which model would you like to use?",
choices: await getAvailableModelChoices(
false,
program.openAiKey,
program.listServerModels,
),
name: "observability",
message: "Would you like to set up observability?",
choices: [
{ title: "No", value: "none" },
{ title: "OpenTelemetry", value: "opentelemetry" },
],
initial: 0,
},
handlers,
questionHandlers,
);
program.model = model;
preferences.model = model;
program.observability = observability;
preferences.observability = observability;
}
}
if (!program.embeddingModel && program.framework === "fastapi") {
if (ciInfo.isCI) {
program.embeddingModel = getPrefOrDefault("embeddingModel");
} else {
const { embeddingModel } = await prompts(
{
type: "select",
name: "embeddingModel",
message: "Which embedding model would you like to use?",
choices: await getAvailableModelChoices(
true,
program.openAiKey,
program.listServerModels,
),
initial: 0,
},
handlers,
);
program.embeddingModel = embeddingModel;
preferences.embeddingModel = embeddingModel;
}
if (!program.modelConfig) {
const modelConfig = await askModelConfig({
openAiKey,
askModels: program.askModels ?? false,
});
program.modelConfig = modelConfig;
preferences.modelConfig = modelConfig;
}
if (!program.dataSources) {
@@ -627,7 +487,7 @@ export const askQuestions = async (
),
initial: firstQuestion ? 1 : 0,
},
handlers,
questionHandlers,
);
if (selectedSource === "no" || selectedSource === "none") {
@@ -673,7 +533,7 @@ export const askQuestions = async (
return true;
},
},
handlers,
questionHandlers,
);
program.dataSources.push({
@@ -718,7 +578,7 @@ export const askQuestions = async (
];
program.dataSources.push({
type: "db",
config: await prompts(dbPrompts, handlers),
config: await prompts(dbPrompts, questionHandlers),
});
}
}
@@ -745,7 +605,7 @@ export const askQuestions = async (
active: "yes",
inactive: "no",
},
handlers,
questionHandlers,
);
program.useLlamaParse = useLlamaParse;
@@ -758,7 +618,7 @@ export const askQuestions = async (
message:
"Please provide your LlamaIndex Cloud API key (leave blank to skip):",
},
handlers,
questionHandlers,
);
program.llamaCloudKey = llamaCloudKey;
}
@@ -777,7 +637,7 @@ export const askQuestions = async (
choices: getVectorDbChoices(program.framework),
initial: 0,
},
handlers,
questionHandlers,
);
program.vectorDb = vectorDb;
preferences.vectorDb = vectorDb;
@@ -812,3 +672,7 @@ export const askQuestions = async (
await askPostInstallAction();
};
export const toChoice = (value: string) => {
return { title: value, value };
};
@@ -1,35 +0,0 @@
import os
import yaml
import importlib
from llama_index.core.tools.tool_spec.base import BaseToolSpec
from llama_index.core.tools.function_tool import FunctionTool
class ToolFactory:
@staticmethod
def create_tool(tool_name: str, **kwargs) -> list[FunctionTool]:
try:
tool_package, tool_cls_name = tool_name.split(".")
module_name = f"llama_index.tools.{tool_package}"
module = importlib.import_module(module_name)
tool_class = getattr(module, tool_cls_name)
tool_spec: BaseToolSpec = tool_class(**kwargs)
return tool_spec.to_tool_list()
except (ImportError, AttributeError) as e:
raise ValueError(f"Unsupported tool: {tool_name}") from e
except TypeError as e:
raise ValueError(
f"Could not create tool: {tool_name}. With config: {kwargs}"
) from e
@staticmethod
def from_env() -> list[FunctionTool]:
tools = []
if os.path.exists("config/tools.yaml"):
with open("config/tools.yaml", "r") as f:
tool_configs = yaml.safe_load(f)
for name, config in tool_configs.items():
tools += ToolFactory.create_tool(name, **config)
return tools
@@ -0,0 +1,56 @@
import os
import yaml
import importlib
from llama_index.core.tools.tool_spec.base import BaseToolSpec
from llama_index.core.tools.function_tool import FunctionTool
class ToolType:
LLAMAHUB = "llamahub"
LOCAL = "local"
class ToolFactory:
TOOL_SOURCE_PACKAGE_MAP = {
ToolType.LLAMAHUB: "llama_index.tools",
ToolType.LOCAL: "app.engine.tools",
}
@staticmethod
def load_tools(tool_type: str, tool_name: str, config: dict) -> list[FunctionTool]:
source_package = ToolFactory.TOOL_SOURCE_PACKAGE_MAP[tool_type]
try:
if "ToolSpec" in tool_name:
tool_package, tool_cls_name = tool_name.split(".")
module_name = f"{source_package}.{tool_package}"
module = importlib.import_module(module_name)
tool_class = getattr(module, tool_cls_name)
tool_spec: BaseToolSpec = tool_class(**config)
return tool_spec.to_tool_list()
else:
module = importlib.import_module(f"{source_package}.{tool_name}")
tools = getattr(module, "tools")
if not all(isinstance(tool, FunctionTool) for tool in tools):
raise ValueError(
f"The module {module} does not contain valid tools"
)
return tools
except ImportError as e:
raise ValueError(f"Failed to import tool {tool_name}: {e}")
except AttributeError as e:
raise ValueError(f"Failed to load tool {tool_name}: {e}")
@staticmethod
def from_env() -> list[FunctionTool]:
tools = []
if os.path.exists("config/tools.yaml"):
with open("config/tools.yaml", "r") as f:
tool_configs = yaml.safe_load(f)
for tool_type, config_entries in tool_configs.items():
for tool_name, config in config_entries.items():
tools.extend(
ToolFactory.load_tools(tool_type, tool_name, config)
)
return tools
@@ -0,0 +1,134 @@
import os
import logging
import base64
import uuid
from pydantic import BaseModel
from typing import List, Tuple, Dict
from llama_index.core.tools import FunctionTool
from e2b_code_interpreter import CodeInterpreter
from e2b_code_interpreter.models import Logs
logger = logging.getLogger(__name__)
class InterpreterExtraResult(BaseModel):
type: str
filename: str
url: str
class E2BToolOutput(BaseModel):
is_error: bool
logs: Logs
results: List[InterpreterExtraResult] = []
class E2BCodeInterpreter:
output_dir = "tool-output"
def __init__(self, api_key: str, filesever_url_prefix: str):
self.api_key = api_key
self.filesever_url_prefix = filesever_url_prefix
def get_output_path(self, filename: str) -> str:
# if output directory doesn't exist, create it
if not os.path.exists(self.output_dir):
os.makedirs(self.output_dir, exist_ok=True)
return os.path.join(self.output_dir, filename)
def save_to_disk(self, base64_data: str, ext: str) -> Dict:
filename = f"{uuid.uuid4()}.{ext}" # generate a unique filename
buffer = base64.b64decode(base64_data)
output_path = self.get_output_path(filename)
try:
with open(output_path, "wb") as file:
file.write(buffer)
except IOError as e:
logger.error(f"Failed to write to file {output_path}: {str(e)}")
raise e
logger.info(f"Saved file to {output_path}")
return {
"outputPath": output_path,
"filename": filename,
}
def get_file_url(self, filename: str) -> str:
return f"{self.filesever_url_prefix}/{self.output_dir}/{filename}"
def parse_result(self, result) -> List[InterpreterExtraResult]:
"""
The result could include multiple formats (e.g. png, svg, etc.) but encoded in base64
We save each result to disk and return saved file metadata (extension, filename, url)
"""
if not result:
return []
output = []
try:
formats = result.formats()
base64_data_arr = [result[format] for format in formats]
for ext, base64_data in zip(formats, base64_data_arr):
if ext and base64_data:
result = self.save_to_disk(base64_data, ext)
filename = result["filename"]
output.append(
InterpreterExtraResult(
type=ext, filename=filename, url=self.get_file_url(filename)
)
)
except Exception as error:
logger.error("Error when saving data to disk", error)
return output
def interpret(self, code: str) -> E2BToolOutput:
with CodeInterpreter(api_key=self.api_key) as interpreter:
logger.info(
f"\n{'='*50}\n> Running following AI-generated code:\n{code}\n{'='*50}"
)
exec = interpreter.notebook.exec_cell(code)
if exec.error:
output = E2BToolOutput(is_error=True, logs=[exec.error])
else:
if len(exec.results) == 0:
output = E2BToolOutput(is_error=False, logs=exec.logs, results=[])
else:
results = self.parse_result(exec.results[0])
output = E2BToolOutput(
is_error=False, logs=exec.logs, results=results
)
return output
def code_interpret(code: str) -> Dict:
"""
Execute python code in a Jupyter notebook cell and return any result, stdout, stderr, display_data, and error.
"""
api_key = os.getenv("E2B_API_KEY")
filesever_url_prefix = os.getenv("FILESERVER_URL_PREFIX")
if not api_key:
raise ValueError(
"E2B_API_KEY key is required to run code interpreter. Get it here: https://e2b.dev/docs/getting-started/api-key"
)
if not filesever_url_prefix:
raise ValueError(
"FILESERVER_URL_PREFIX is required to display file output from sandbox"
)
interpreter = E2BCodeInterpreter(
api_key=api_key, filesever_url_prefix=filesever_url_prefix
)
output = interpreter.interpret(code)
return output.dict()
# Specify as functions tools to be loaded by the ToolFactory
tools = [FunctionTool.from_defaults(code_interpret)]
@@ -0,0 +1,72 @@
"""Open Meteo weather map tool spec."""
import logging
import requests
import pytz
from llama_index.core.tools import FunctionTool
logger = logging.getLogger(__name__)
class OpenMeteoWeather:
geo_api = "https://geocoding-api.open-meteo.com/v1"
weather_api = "https://api.open-meteo.com/v1"
@classmethod
def _get_geo_location(cls, location: str) -> dict:
"""Get geo location from location name."""
params = {"name": location, "count": 10, "language": "en", "format": "json"}
response = requests.get(f"{cls.geo_api}/search", params=params)
if response.status_code != 200:
raise Exception(f"Failed to fetch geo location: {response.status_code}")
else:
data = response.json()
result = data["results"][0]
geo_location = {
"id": result["id"],
"name": result["name"],
"latitude": result["latitude"],
"longitude": result["longitude"],
}
return geo_location
@classmethod
def get_weather_information(cls, location: str) -> dict:
"""Use this function to get the weather of any given location.
Note that the weather code should follow WMO Weather interpretation codes (WW):
0: Clear sky
1, 2, 3: Mainly clear, partly cloudy, and overcast
45, 48: Fog and depositing rime fog
51, 53, 55: Drizzle: Light, moderate, and dense intensity
56, 57: Freezing Drizzle: Light and dense intensity
61, 63, 65: Rain: Slight, moderate and heavy intensity
66, 67: Freezing Rain: Light and heavy intensity
71, 73, 75: Snow fall: Slight, moderate, and heavy intensity
77: Snow grains
80, 81, 82: Rain showers: Slight, moderate, and violent
85, 86: Snow showers slight and heavy
95: Thunderstorm: Slight or moderate
96, 99: Thunderstorm with slight and heavy hail
"""
logger.info(
f"Calling open-meteo api to get weather information of location: {location}"
)
geo_location = cls._get_geo_location(location)
timezone = pytz.timezone("UTC").zone
params = {
"latitude": geo_location["latitude"],
"longitude": geo_location["longitude"],
"current": "temperature_2m,weather_code",
"hourly": "temperature_2m,weather_code",
"daily": "weather_code",
"timezone": timezone,
}
response = requests.get(f"{cls.weather_api}/forecast", params=params)
if response.status_code != 200:
raise Exception(
f"Failed to fetch weather information: {response.status_code}"
)
return response.json()
tools = [FunctionTool.from_defaults(OpenMeteoWeather.get_weather_information)]
@@ -1,5 +1,6 @@
import os
from app.engine.index import get_index
from fastapi import HTTPException
def get_chat_engine():
@@ -8,8 +9,11 @@ def get_chat_engine():
index = get_index()
if index is None:
raise Exception(
"StorageContext is empty - call 'python app/engine/generate.py' to generate the storage first"
raise HTTPException(
status_code=500,
detail=str(
"StorageContext is empty - call 'poetry run generate' to generate the storage first"
),
)
return index.as_chat_engine(
@@ -1,21 +1,16 @@
import {
BaseTool,
OpenAI,
OpenAIAgent,
QueryEngineTool,
ToolFactory,
} from "llamaindex";
import { BaseToolWithCall, OpenAIAgent, QueryEngineTool } from "llamaindex";
import fs from "node:fs/promises";
import path from "node:path";
import { getDataSource } from "./index";
import { STORAGE_CACHE_DIR } from "./shared";
import { createTools } from "./tools";
export async function createChatEngine(llm: OpenAI) {
let tools: BaseTool[] = [];
export async function createChatEngine() {
const tools: BaseToolWithCall[] = [];
// Add a query engine tool if we have a data source
// Delete this code if you don't have a data source
const index = await getDataSource(llm);
const index = await getDataSource();
if (index) {
tools.push(
new QueryEngineTool({
@@ -28,17 +23,20 @@ export async function createChatEngine(llm: OpenAI) {
);
}
const configFile = path.join("config", "tools.json");
let toolConfig: any;
try {
// add tools from config file if it exists
const config = JSON.parse(
await fs.readFile(path.join("config", "tools.json"), "utf8"),
);
tools = tools.concat(await ToolFactory.createTools(config));
} catch {}
toolConfig = JSON.parse(await fs.readFile(configFile, "utf8"));
} catch (e) {
console.info(`Could not read ${configFile} file. Using no tools.`);
}
if (toolConfig) {
tools.push(...(await createTools(toolConfig)));
}
return new OpenAIAgent({
tools,
llm,
verbose: true,
systemPrompt: process.env.SYSTEM_PROMPT,
});
}
@@ -0,0 +1,42 @@
import { BaseToolWithCall } from "llamaindex";
import { ToolsFactory } from "llamaindex/tools/ToolsFactory";
import { InterpreterTool, InterpreterToolParams } from "./interpreter";
import { WeatherTool, WeatherToolParams } from "./weather";
type ToolCreator = (config: unknown) => BaseToolWithCall;
export async function createTools(toolConfig: {
local: Record<string, unknown>;
llamahub: any;
}): Promise<BaseToolWithCall[]> {
// add local tools from the 'tools' folder (if configured)
const tools = createLocalTools(toolConfig.local);
// add tools from LlamaIndexTS (if configured)
tools.push(...(await ToolsFactory.createTools(toolConfig.llamahub)));
return tools;
}
const toolFactory: Record<string, ToolCreator> = {
weather: (config: unknown) => {
return new WeatherTool(config as WeatherToolParams);
},
interpreter: (config: unknown) => {
return new InterpreterTool(config as InterpreterToolParams);
},
};
function createLocalTools(
localConfig: Record<string, unknown>,
): BaseToolWithCall[] {
const tools: BaseToolWithCall[] = [];
Object.keys(localConfig).forEach((key) => {
if (key in toolFactory) {
const toolConfig = localConfig[key];
const tool = toolFactory[key](toolConfig);
tools.push(tool);
}
});
return tools;
}
@@ -0,0 +1,174 @@
import { CodeInterpreter, Logs, Result } from "@e2b/code-interpreter";
import type { JSONSchemaType } from "ajv";
import fs from "fs";
import { BaseTool, ToolMetadata } from "llamaindex";
import crypto from "node:crypto";
import path from "node:path";
export type InterpreterParameter = {
code: string;
};
export type InterpreterToolParams = {
metadata?: ToolMetadata<JSONSchemaType<InterpreterParameter>>;
apiKey?: string;
fileServerURLPrefix?: string;
};
export type InterpreterToolOuput = {
isError: boolean;
logs: Logs;
extraResult: InterpreterExtraResult[];
};
type InterpreterExtraType =
| "html"
| "markdown"
| "svg"
| "png"
| "jpeg"
| "pdf"
| "latex"
| "json"
| "javascript";
export type InterpreterExtraResult = {
type: InterpreterExtraType;
filename: string;
url: string;
};
const DEFAULT_META_DATA: ToolMetadata<JSONSchemaType<InterpreterParameter>> = {
name: "interpreter",
description:
"Execute python code in a Jupyter notebook cell and return any result, stdout, stderr, display_data, and error.",
parameters: {
type: "object",
properties: {
code: {
type: "string",
description: "The python code to execute in a single cell.",
},
},
required: ["code"],
},
};
export class InterpreterTool implements BaseTool<InterpreterParameter> {
private readonly outputDir = "tool-output";
private apiKey?: string;
private fileServerURLPrefix?: string;
metadata: ToolMetadata<JSONSchemaType<InterpreterParameter>>;
codeInterpreter?: CodeInterpreter;
constructor(params?: InterpreterToolParams) {
this.metadata = params?.metadata || DEFAULT_META_DATA;
this.apiKey = params?.apiKey || process.env.E2B_API_KEY;
this.fileServerURLPrefix =
params?.fileServerURLPrefix || process.env.FILESERVER_URL_PREFIX;
if (!this.apiKey) {
throw new Error(
"E2B_API_KEY key is required to run code interpreter. Get it here: https://e2b.dev/docs/getting-started/api-key",
);
}
if (!this.fileServerURLPrefix) {
throw new Error(
"FILESERVER_URL_PREFIX is required to display file output from sandbox",
);
}
}
public async initInterpreter() {
if (!this.codeInterpreter) {
this.codeInterpreter = await CodeInterpreter.create({
apiKey: this.apiKey,
});
}
return this.codeInterpreter;
}
public async codeInterpret(code: string): Promise<InterpreterToolOuput> {
console.log(
`\n${"=".repeat(50)}\n> Running following AI-generated code:\n${code}\n${"=".repeat(50)}`,
);
const interpreter = await this.initInterpreter();
const exec = await interpreter.notebook.execCell(code);
if (exec.error) console.error("[Code Interpreter error]", exec.error);
const extraResult = await this.getExtraResult(exec.results[0]);
const result: InterpreterToolOuput = {
isError: !!exec.error,
logs: exec.logs,
extraResult,
};
return result;
}
async call(input: InterpreterParameter): Promise<InterpreterToolOuput> {
const result = await this.codeInterpret(input.code);
await this.codeInterpreter?.close();
return result;
}
private async getExtraResult(
res?: Result,
): Promise<InterpreterExtraResult[]> {
if (!res) return [];
const output: InterpreterExtraResult[] = [];
try {
const formats = res.formats(); // formats available for the result. Eg: ['png', ...]
const base64DataArr = formats.map((f) => res[f as keyof Result]); // get base64 data for each format
// save base64 data to file and return the url
for (let i = 0; i < formats.length; i++) {
const ext = formats[i];
const base64Data = base64DataArr[i];
if (ext && base64Data) {
const { filename } = this.saveToDisk(base64Data, ext);
output.push({
type: ext as InterpreterExtraType,
filename,
url: this.getFileUrl(filename),
});
}
}
} catch (error) {
console.error("Error when saving data to disk", error);
}
return output;
}
// Consider saving to cloud storage instead but it may cost more for you
// See: https://e2b.dev/docs/sandbox/api/filesystem#write-to-file
private saveToDisk(
base64Data: string,
ext: string,
): {
outputPath: string;
filename: string;
} {
const filename = `${crypto.randomUUID()}.${ext}`; // generate a unique filename
const buffer = Buffer.from(base64Data, "base64");
const outputPath = this.getOutputPath(filename);
fs.writeFileSync(outputPath, buffer);
console.log(`Saved file to ${outputPath}`);
return {
outputPath,
filename,
};
}
private getOutputPath(filename: string): string {
// if outputDir doesn't exist, create it
if (!fs.existsSync(this.outputDir)) {
fs.mkdirSync(this.outputDir, { recursive: true });
}
return path.join(this.outputDir, filename);
}
private getFileUrl(filename: string): string {
return `${this.fileServerURLPrefix}/${this.outputDir}/${filename}`;
}
}
@@ -0,0 +1,81 @@
import type { JSONSchemaType } from "ajv";
import { BaseTool, ToolMetadata } from "llamaindex";
interface GeoLocation {
id: string;
name: string;
latitude: number;
longitude: number;
}
export type WeatherParameter = {
location: string;
};
export type WeatherToolParams = {
metadata?: ToolMetadata<JSONSchemaType<WeatherParameter>>;
};
const DEFAULT_META_DATA: ToolMetadata<JSONSchemaType<WeatherParameter>> = {
name: "get_weather_information",
description: `
Use this function to get the weather of any given location.
Note that the weather code should follow WMO Weather interpretation codes (WW):
0: Clear sky
1, 2, 3: Mainly clear, partly cloudy, and overcast
45, 48: Fog and depositing rime fog
51, 53, 55: Drizzle: Light, moderate, and dense intensity
56, 57: Freezing Drizzle: Light and dense intensity
61, 63, 65: Rain: Slight, moderate and heavy intensity
66, 67: Freezing Rain: Light and heavy intensity
71, 73, 75: Snow fall: Slight, moderate, and heavy intensity
77: Snow grains
80, 81, 82: Rain showers: Slight, moderate, and violent
85, 86: Snow showers slight and heavy
95: Thunderstorm: Slight or moderate
96, 99: Thunderstorm with slight and heavy hail
`,
parameters: {
type: "object",
properties: {
location: {
type: "string",
description: "The location to get the weather information",
},
},
required: ["location"],
},
};
export class WeatherTool implements BaseTool<WeatherParameter> {
metadata: ToolMetadata<JSONSchemaType<WeatherParameter>>;
private getGeoLocation = async (location: string): Promise<GeoLocation> => {
const apiUrl = `https://geocoding-api.open-meteo.com/v1/search?name=${location}&count=10&language=en&format=json`;
const response = await fetch(apiUrl);
const data = await response.json();
const { id, name, latitude, longitude } = data.results[0];
return { id, name, latitude, longitude };
};
private getWeatherByLocation = async (location: string) => {
console.log(
"Calling open-meteo api to get weather information of location:",
location,
);
const { latitude, longitude } = await this.getGeoLocation(location);
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone;
const apiUrl = `https://api.open-meteo.com/v1/forecast?latitude=${latitude}&longitude=${longitude}&current=temperature_2m,weather_code&hourly=temperature_2m,weather_code&daily=weather_code&timezone=${timezone}`;
const response = await fetch(apiUrl);
const data = await response.json();
return data;
};
constructor(params?: WeatherToolParams) {
this.metadata = params?.metadata || DEFAULT_META_DATA;
}
async call(input: WeatherParameter) {
return await this.getWeatherByLocation(input.location);
}
}
@@ -1,18 +1,21 @@
import { ContextChatEngine, LLM } from "llamaindex";
import { ContextChatEngine, Settings } from "llamaindex";
import { getDataSource } from "./index";
export async function createChatEngine(llm: LLM) {
const index = await getDataSource(llm);
export async function createChatEngine() {
const index = await getDataSource();
if (!index) {
throw new Error(
`StorageContext is empty - call 'npm run generate' to generate the storage first`,
);
}
const retriever = index.asRetriever();
retriever.similarityTopK = 3;
retriever.similarityTopK = process.env.TOP_K
? parseInt(process.env.TOP_K)
: 3;
return new ContextChatEngine({
chatModel: llm,
chatModel: Settings.llm,
retriever,
systemPrompt: process.env.SYSTEM_PROMPT,
});
}
+28 -8
View File
@@ -1,7 +1,10 @@
import os
import logging
from llama_parse import LlamaParse
from pydantic import BaseModel, validator
logger = logging.getLogger(__name__)
class FileLoaderConfig(BaseModel):
data_dir: str = "data"
@@ -27,11 +30,28 @@ def llama_parse_parser():
def get_file_documents(config: FileLoaderConfig):
from llama_index.core.readers import SimpleDirectoryReader
reader = SimpleDirectoryReader(
config.data_dir,
recursive=True,
)
if config.use_llama_parse:
parser = llama_parse_parser()
reader.file_extractor = {".pdf": parser}
return reader.load_data()
try:
reader = SimpleDirectoryReader(
config.data_dir,
recursive=True,
filename_as_id=True,
)
if config.use_llama_parse:
parser = llama_parse_parser()
reader.file_extractor = {".pdf": parser}
return reader.load_data()
except ValueError as e:
import sys, traceback
# Catch the error if the data dir is empty
# and return as empty document list
_, _, exc_traceback = sys.exc_info()
function_name = traceback.extract_tb(exc_traceback)[-1].name
if function_name == "_add_files":
logger.warning(
f"Failed to load file documents, error message: {e} . Return as empty document list."
)
return []
else:
# Raise the error if it is not the case of empty data dir
raise e
@@ -0,0 +1,5 @@
from traceloop.sdk import Traceloop
def init_observability():
Traceloop.init()
@@ -1,37 +0,0 @@
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.core.storage import StorageContext
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.astra_db import AstraDBVectorStore
from app.settings import init_settings
from app.engine.loaders import get_documents
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
logger.info("Creating new index")
documents = get_documents()
store = AstraDBVectorStore(
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
api_endpoint=os.environ["ASTRA_DB_ENDPOINT"],
collection_name=os.environ["ASTRA_DB_COLLECTION"],
embedding_dimension=1536,
)
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(f"Successfully created embeddings in the AstraDB")
if __name__ == "__main__":
init_settings()
generate_datasource()
@@ -1,21 +0,0 @@
import logging
import os
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.astra_db import AstraDBVectorStore
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Connecting to index from AstraDB...")
store = AstraDBVectorStore(
token=os.environ["ASTRA_DB_APPLICATION_TOKEN"],
api_endpoint=os.environ["ASTRA_DB_ENDPOINT"],
collection_name=os.environ["ASTRA_DB_COLLECTION"],
embedding_dimension=1536,
)
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished connecting to index from AstraDB.")
return index
@@ -0,0 +1,20 @@
import os
from llama_index.vector_stores.astra_db import AstraDBVectorStore
def get_vector_store():
endpoint = os.getenv("ASTRA_DB_ENDPOINT")
token = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
collection = os.getenv("ASTRA_DB_COLLECTION")
if not endpoint or not token or not collection:
raise ValueError(
"Please config ASTRA_DB_ENDPOINT, ASTRA_DB_APPLICATION_TOKEN and ASTRA_DB_COLLECTION"
" to your environment variables or config them in the .env file"
)
store = AstraDBVectorStore(
token=token,
api_endpoint=endpoint,
collection_name=collection,
embedding_dimension=int(os.getenv("EMBEDDING_DIM")),
)
return store
@@ -0,0 +1,24 @@
import os
from llama_index.vector_stores.chroma import ChromaVectorStore
def get_vector_store():
collection_name = os.getenv("CHROMA_COLLECTION", "default")
chroma_path = os.getenv("CHROMA_PATH")
# if CHROMA_PATH is set, use a local ChromaVectorStore from the path
# otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet)
if chroma_path:
store = ChromaVectorStore.from_params(
persist_dir=chroma_path, collection_name=collection_name
)
else:
if not os.getenv("CHROMA_HOST") or not os.getenv("CHROMA_PORT"):
raise ValueError(
"Please provide either CHROMA_PATH or CHROMA_HOST and CHROMA_PORT"
)
store = ChromaVectorStore.from_params(
host=os.getenv("CHROMA_HOST"),
port=int(os.getenv("CHROMA_PORT")),
collection_name=collection_name,
)
return store
@@ -1,39 +0,0 @@
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.core.storage import StorageContext
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.milvus import MilvusVectorStore
from app.settings import init_settings
from app.engine.loaders import get_documents
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
store = MilvusVectorStore(
uri=os.environ["MILVUS_ADDRESS"],
user=os.getenv("MILVUS_USERNAME"),
password=os.getenv("MILVUS_PASSWORD"),
collection_name=os.getenv("MILVUS_COLLECTION"),
dim=int(os.getenv("MILVUS_DIMENSION", "1536")),
)
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(f"Successfully created embeddings in the Milvus")
if __name__ == "__main__":
init_settings()
generate_datasource()
@@ -1,22 +0,0 @@
import logging
import os
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.milvus import MilvusVectorStore
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Connecting to index from Milvus...")
store = MilvusVectorStore(
uri=os.getenv("MILVUS_ADDRESS"),
user=os.getenv("MILVUS_USERNAME"),
password=os.getenv("MILVUS_PASSWORD"),
collection_name=os.getenv("MILVUS_COLLECTION"),
dim=int(os.getenv("EMBEDDING_DIM", "1536")),
)
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished connecting to index from Milvus.")
return index
@@ -0,0 +1,20 @@
import os
from llama_index.vector_stores.milvus import MilvusVectorStore
def get_vector_store():
address = os.getenv("MILVUS_ADDRESS")
collection = os.getenv("MILVUS_COLLECTION")
if not address or not collection:
raise ValueError(
"Please set MILVUS_ADDRESS and MILVUS_COLLECTION to your environment variables"
" or config them in the .env file"
)
store = MilvusVectorStore(
uri=address,
user=os.getenv("MILVUS_USERNAME"),
password=os.getenv("MILVUS_PASSWORD"),
collection_name=collection,
dim=int(os.getenv("EMBEDDING_DIM")),
)
return store
@@ -1,43 +0,0 @@
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.core.storage import StorageContext
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from app.settings import init_settings
from app.engine.loaders import get_documents
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
store = MongoDBAtlasVectorSearch(
db_name=os.environ["MONGODB_DATABASE"],
collection_name=os.environ["MONGODB_VECTORS"],
index_name=os.environ["MONGODB_VECTOR_INDEX"],
)
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(
f"Successfully created embeddings in the MongoDB collection {os.environ['MONGODB_VECTORS']}"
)
logger.info(
"""IMPORTANT: You can't query your index yet because you need to create a vector search index in MongoDB's UI now.
See https://github.com/run-llama/mongodb-demo/tree/main?tab=readme-ov-file#create-a-vector-search-index"""
)
if __name__ == "__main__":
init_settings()
generate_datasource()
@@ -1,20 +0,0 @@
import logging
import os
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Connecting to index from MongoDB...")
store = MongoDBAtlasVectorSearch(
db_name=os.environ["MONGODB_DATABASE"],
collection_name=os.environ["MONGODB_VECTORS"],
index_name=os.environ["MONGODB_VECTOR_INDEX"],
)
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished connecting to index from MongoDB.")
return index
@@ -0,0 +1,20 @@
import os
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
def get_vector_store():
db_uri = os.getenv("MONGODB_URI")
db_name = os.getenv("MONGODB_DATABASE")
collection_name = os.getenv("MONGODB_VECTORS")
index_name = os.getenv("MONGODB_VECTOR_INDEX")
if not db_uri or not db_name or not collection_name or not index_name:
raise ValueError(
"Please set MONGODB_URI, MONGODB_DATABASE, MONGODB_VECTORS, and MONGODB_VECTOR_INDEX"
" to your environment variables or config them in .env file"
)
store = MongoDBAtlasVectorSearch(
db_name=db_name,
collection_name=collection_name,
index_name=index_name,
)
return store
@@ -1 +0,0 @@
STORAGE_DIR = "storage" # directory to cache the generated index
@@ -2,11 +2,11 @@ from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.core.indices import (
VectorStoreIndex,
)
from app.engine.constants import STORAGE_DIR
from app.engine.loaders import get_documents
from app.settings import init_settings
@@ -16,17 +16,18 @@ logger = logging.getLogger()
def generate_datasource():
init_settings()
logger.info("Creating new index")
storage_dir = os.environ.get("STORAGE_DIR", "storage")
# load the documents and create the index
documents = get_documents()
index = VectorStoreIndex.from_documents(
documents,
)
# store it for later
index.storage_context.persist(STORAGE_DIR)
logger.info(f"Finished creating new index. Stored in {STORAGE_DIR}")
index.storage_context.persist(storage_dir)
logger.info(f"Finished creating new index. Stored in {storage_dir}")
if __name__ == "__main__":
init_settings()
generate_datasource()
@@ -1,20 +1,30 @@
import logging
import os
import logging
from datetime import timedelta
from app.engine.constants import STORAGE_DIR
from cachetools import cached, TTLCache
from llama_index.core.storage import StorageContext
from llama_index.core.indices import load_index_from_storage
logger = logging.getLogger("uvicorn")
@cached(
TTLCache(maxsize=10, ttl=timedelta(minutes=5).total_seconds()),
key=lambda *args, **kwargs: "global_storage_context",
)
def get_storage_context(persist_dir: str) -> StorageContext:
return StorageContext.from_defaults(persist_dir=persist_dir)
def get_index():
storage_dir = os.getenv("STORAGE_DIR", "storage")
# check if storage already exists
if not os.path.exists(STORAGE_DIR):
if not os.path.exists(storage_dir):
return None
# load the existing index
logger.info(f"Loading index from {STORAGE_DIR}...")
storage_context = StorageContext.from_defaults(persist_dir=STORAGE_DIR)
logger.info(f"Loading index from {storage_dir}...")
storage_context = get_storage_context(storage_dir)
index = load_index_from_storage(storage_context)
logger.info(f"Finished loading index from {STORAGE_DIR}")
logger.info(f"Finished loading index from {storage_dir}")
return index
@@ -1,2 +0,0 @@
PGVECTOR_SCHEMA = "public"
PGVECTOR_TABLE = "llamaindex_embedding"
@@ -1,35 +0,0 @@
from dotenv import load_dotenv
load_dotenv()
import logging
from llama_index.core.indices import VectorStoreIndex
from llama_index.core.storage import StorageContext
from app.engine.loaders import get_documents
from app.settings import init_settings
from app.engine.utils import init_pg_vector_store_from_env
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
store = init_pg_vector_store_from_env()
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(
f"Successfully created embeddings in the PG vector store, schema={store.schema_name} table={store.table_name}"
)
if __name__ == "__main__":
init_settings()
generate_datasource()
@@ -1,13 +0,0 @@
import logging
from llama_index.core.indices.vector_store import VectorStoreIndex
from app.engine.utils import init_pg_vector_store_from_env
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Connecting to index from PGVector...")
store = init_pg_vector_store_from_env()
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished connecting to index from PGVector.")
return index
@@ -1,27 +0,0 @@
import os
from llama_index.vector_stores.postgres import PGVectorStore
from urllib.parse import urlparse
from app.engine.constants import PGVECTOR_SCHEMA, PGVECTOR_TABLE
def init_pg_vector_store_from_env():
original_conn_string = os.environ.get("PG_CONNECTION_STRING")
if original_conn_string is None or original_conn_string == "":
raise ValueError("PG_CONNECTION_STRING environment variable is not set.")
# The PGVectorStore requires both two connection strings, one for psycopg2 and one for asyncpg
# Update the configured scheme with the psycopg2 and asyncpg schemes
original_scheme = urlparse(original_conn_string).scheme + "://"
conn_string = original_conn_string.replace(
original_scheme, "postgresql+psycopg2://"
)
async_conn_string = original_conn_string.replace(
original_scheme, "postgresql+asyncpg://"
)
return PGVectorStore(
connection_string=conn_string,
async_connection_string=async_conn_string,
schema_name=PGVECTOR_SCHEMA,
table_name=PGVECTOR_TABLE,
)
@@ -0,0 +1,37 @@
import os
from llama_index.vector_stores.postgres import PGVectorStore
from urllib.parse import urlparse
PGVECTOR_SCHEMA = "public"
PGVECTOR_TABLE = "llamaindex_embedding"
vector_store: PGVectorStore = None
def get_vector_store():
global vector_store
if vector_store is None:
original_conn_string = os.environ.get("PG_CONNECTION_STRING")
if original_conn_string is None or original_conn_string == "":
raise ValueError("PG_CONNECTION_STRING environment variable is not set.")
# The PGVectorStore requires both two connection strings, one for psycopg2 and one for asyncpg
# Update the configured scheme with the psycopg2 and asyncpg schemes
original_scheme = urlparse(original_conn_string).scheme + "://"
conn_string = original_conn_string.replace(
original_scheme, "postgresql+psycopg2://"
)
async_conn_string = original_conn_string.replace(
original_scheme, "postgresql+asyncpg://"
)
vector_store = PGVectorStore(
connection_string=conn_string,
async_connection_string=async_conn_string,
schema_name=PGVECTOR_SCHEMA,
table_name=PGVECTOR_TABLE,
embed_dim=int(os.environ.get("EMBEDDING_DIM", 1024)),
)
return vector_store
@@ -1,39 +0,0 @@
from dotenv import load_dotenv
load_dotenv()
import os
import logging
from llama_index.core.storage import StorageContext
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.pinecone import PineconeVectorStore
from app.settings import init_settings
from app.engine.loaders import get_documents
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()
def generate_datasource():
logger.info("Creating new index")
# load the documents and create the index
documents = get_documents()
store = PineconeVectorStore(
api_key=os.environ["PINECONE_API_KEY"],
index_name=os.environ["PINECONE_INDEX_NAME"],
environment=os.environ["PINECONE_ENVIRONMENT"],
)
storage_context = StorageContext.from_defaults(vector_store=store)
VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
show_progress=True, # this will show you a progress bar as the embeddings are created
)
logger.info(
f"Successfully created embeddings and save to your Pinecone index {os.environ['PINECONE_INDEX_NAME']}"
)
if __name__ == "__main__":
init_settings()
generate_datasource()
@@ -1,20 +0,0 @@
import logging
import os
from llama_index.core.indices import VectorStoreIndex
from llama_index.vector_stores.pinecone import PineconeVectorStore
logger = logging.getLogger("uvicorn")
def get_index():
logger.info("Connecting to index from Pinecone...")
store = PineconeVectorStore(
api_key=os.environ["PINECONE_API_KEY"],
index_name=os.environ["PINECONE_INDEX_NAME"],
environment=os.environ["PINECONE_ENVIRONMENT"],
)
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished connecting to index from Pinecone.")
return index
@@ -0,0 +1,19 @@
import os
from llama_index.vector_stores.pinecone import PineconeVectorStore
def get_vector_store():
api_key = os.getenv("PINECONE_API_KEY")
index_name = os.getenv("PINECONE_INDEX_NAME")
environment = os.getenv("PINECONE_ENVIRONMENT")
if not api_key or not index_name or not environment:
raise ValueError(
"Please set PINECONE_API_KEY, PINECONE_INDEX_NAME, and PINECONE_ENVIRONMENT"
" to your environment variables or config them in the .env file"
)
store = PineconeVectorStore(
api_key=api_key,
index_name=index_name,
environment=environment,
)
return store
@@ -0,0 +1,19 @@
import os
from llama_index.vector_stores.qdrant import QdrantVectorStore
def get_vector_store():
collection_name = os.getenv("QDRANT_COLLECTION")
url = os.getenv("QDRANT_URL")
api_key = os.getenv("QDRANT_API_KEY")
if not collection_name or not url:
raise ValueError(
"Please set QDRANT_COLLECTION, QDRANT_URL"
" to your environment variables or config them in the .env file"
)
store = QdrantVectorStore(
collection_name=collection_name,
url=url,
api_key=api_key,
)
return store
@@ -1,11 +1,9 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import * as dotenv from "dotenv";
import {
AstraDBVectorStore,
VectorStoreIndex,
storageContextFromDefaults,
} from "llamaindex";
import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
import { AstraDBVectorStore } from "llamaindex/storage/vectorStore/AstraDBVectorStore";
import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import { checkRequiredEnvVars } from "./shared";
dotenv.config();
@@ -18,7 +16,10 @@ async function loadAndIndex() {
const collectionName = process.env.ASTRA_DB_COLLECTION!;
const vectorStore = new AstraDBVectorStore();
await vectorStore.create(collectionName, {
vector: { dimension: 1536, metric: "cosine" },
vector: {
dimension: parseInt(process.env.EMBEDDING_DIM!),
metric: "cosine",
},
});
await vectorStore.connect(collectionName);
@@ -33,6 +34,7 @@ async function loadAndIndex() {
(async () => {
checkRequiredEnvVars();
initSettings();
await loadAndIndex();
console.log("Finished generating storage.");
})();
@@ -1,20 +1,11 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import {
AstraDBVectorStore,
LLM,
VectorStoreIndex,
serviceContextFromDefaults,
} from "llamaindex";
import { CHUNK_OVERLAP, CHUNK_SIZE, checkRequiredEnvVars } from "./shared";
import { VectorStoreIndex } from "llamaindex";
import { AstraDBVectorStore } from "llamaindex/storage/vectorStore/AstraDBVectorStore";
import { checkRequiredEnvVars } from "./shared";
export async function getDataSource(llm: LLM) {
export async function getDataSource() {
checkRequiredEnvVars();
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
const store = new AstraDBVectorStore();
await store.connect(process.env.ASTRA_DB_COLLECTION!);
return await VectorStoreIndex.fromVectorStore(store, serviceContext);
return await VectorStoreIndex.fromVectorStore(store);
}
@@ -1,10 +1,8 @@
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
const REQUIRED_ENV_VARS = [
"ASTRA_DB_APPLICATION_TOKEN",
"ASTRA_DB_ENDPOINT",
"ASTRA_DB_COLLECTION",
"EMBEDDING_DIM",
];
export function checkRequiredEnvVars() {
@@ -0,0 +1,37 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import * as dotenv from "dotenv";
import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
import { ChromaVectorStore } from "llamaindex/storage/vectorStore/ChromaVectorStore";
import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import { checkRequiredEnvVars } from "./shared";
dotenv.config();
async function loadAndIndex() {
// load objects from storage and convert them into LlamaIndex Document objects
const documents = await getDocuments();
// create vector store
const chromaUri = `http://${process.env.CHROMA_HOST}:${process.env.CHROMA_PORT}`;
const vectorStore = new ChromaVectorStore({
collectionName: process.env.CHROMA_COLLECTION,
chromaClientParams: { path: chromaUri },
});
// create index from all the Documentss and store them in Pinecone
console.log("Start creating embeddings...");
const storageContext = await storageContextFromDefaults({ vectorStore });
await VectorStoreIndex.fromDocuments(documents, { storageContext });
console.log(
"Successfully created embeddings and save to your ChromaDB index.",
);
}
(async () => {
checkRequiredEnvVars();
initSettings();
await loadAndIndex();
console.log("Finished generating storage.");
})();
@@ -0,0 +1,16 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import { VectorStoreIndex } from "llamaindex";
import { ChromaVectorStore } from "llamaindex/storage/vectorStore/ChromaVectorStore";
import { checkRequiredEnvVars } from "./shared";
export async function getDataSource() {
checkRequiredEnvVars();
const chromaUri = `http://${process.env.CHROMA_HOST}:${process.env.CHROMA_PORT}`;
const store = new ChromaVectorStore({
collectionName: process.env.CHROMA_COLLECTION,
chromaClientParams: { path: chromaUri },
});
return await VectorStoreIndex.fromVectorStore(store);
}
@@ -0,0 +1,18 @@
const REQUIRED_ENV_VARS = ["CHROMA_COLLECTION", "CHROMA_HOST", "CHROMA_PORT"];
export function checkRequiredEnvVars() {
const missingEnvVars = REQUIRED_ENV_VARS.filter((envVar) => {
return !process.env[envVar];
});
if (missingEnvVars.length > 0) {
console.log(
`The following environment variables are required but missing: ${missingEnvVars.join(
", ",
)}`,
);
throw new Error(
`Missing environment variables: ${missingEnvVars.join(", ")}`,
);
}
}
@@ -1,11 +1,9 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import * as dotenv from "dotenv";
import {
MilvusVectorStore,
VectorStoreIndex,
storageContextFromDefaults,
} from "llamaindex";
import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
import { MilvusVectorStore } from "llamaindex/storage/vectorStore/MilvusVectorStore";
import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import { checkRequiredEnvVars, getMilvusClient } from "./shared";
dotenv.config();
@@ -32,6 +30,7 @@ async function loadAndIndex() {
(async () => {
checkRequiredEnvVars();
initSettings();
await loadAndIndex();
console.log("Finished generating storage.");
})();
@@ -1,25 +1,11 @@
import {
LLM,
MilvusVectorStore,
serviceContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import {
checkRequiredEnvVars,
CHUNK_OVERLAP,
CHUNK_SIZE,
getMilvusClient,
} from "./shared";
import { VectorStoreIndex } from "llamaindex";
import { MilvusVectorStore } from "llamaindex/storage/vectorStore/MilvusVectorStore";
import { checkRequiredEnvVars, getMilvusClient } from "./shared";
export async function getDataSource(llm: LLM) {
export async function getDataSource() {
checkRequiredEnvVars();
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
const milvusClient = getMilvusClient();
const store = new MilvusVectorStore({ milvusClient });
return await VectorStoreIndex.fromVectorStore(store, serviceContext);
return await VectorStoreIndex.fromVectorStore(store);
}
@@ -1,8 +1,5 @@
import { MilvusClient } from "@zilliz/milvus2-sdk-node";
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
const REQUIRED_ENV_VARS = [
"MILVUS_ADDRESS",
"MILVUS_USERNAME",
@@ -1,17 +1,15 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import * as dotenv from "dotenv";
import {
MongoDBAtlasVectorSearch,
VectorStoreIndex,
storageContextFromDefaults,
} from "llamaindex";
import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
import { MongoDBAtlasVectorSearch } from "llamaindex/storage/vectorStore/MongoDBAtlasVectorSearch";
import { MongoClient } from "mongodb";
import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import { checkRequiredEnvVars } from "./shared";
dotenv.config();
const mongoUri = process.env.MONGO_URI!;
const mongoUri = process.env.MONGODB_URI!;
const databaseName = process.env.MONGODB_DATABASE!;
const vectorCollectionName = process.env.MONGODB_VECTORS!;
const indexName = process.env.MONGODB_VECTOR_INDEX;
@@ -42,6 +40,7 @@ async function loadAndIndex() {
(async () => {
checkRequiredEnvVars();
initSettings();
await loadAndIndex();
console.log("Finished generating storage.");
})();
@@ -1,21 +1,12 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import {
LLM,
MongoDBAtlasVectorSearch,
serviceContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import { VectorStoreIndex } from "llamaindex";
import { MongoDBAtlasVectorSearch } from "llamaindex/storage/vectorStore/MongoDBAtlasVectorSearch";
import { MongoClient } from "mongodb";
import { checkRequiredEnvVars, CHUNK_OVERLAP, CHUNK_SIZE } from "./shared";
import { checkRequiredEnvVars } from "./shared";
export async function getDataSource(llm: LLM) {
export async function getDataSource() {
checkRequiredEnvVars();
const client = new MongoClient(process.env.MONGO_URI!);
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
const store = new MongoDBAtlasVectorSearch({
mongodbClient: client,
dbName: process.env.MONGODB_DATABASE!,
@@ -23,5 +14,5 @@ export async function getDataSource(llm: LLM) {
indexName: process.env.MONGODB_VECTOR_INDEX,
});
return await VectorStoreIndex.fromVectorStore(store, serviceContext);
return await VectorStoreIndex.fromVectorStore(store);
}
@@ -1,8 +1,5 @@
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
const REQUIRED_ENV_VARS = [
"MONGO_URI",
"MONGODB_URI",
"MONGODB_DATABASE",
"MONGODB_VECTORS",
"MONGODB_VECTOR_INDEX",
@@ -1,14 +1,11 @@
import {
ServiceContext,
serviceContextFromDefaults,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import { VectorStoreIndex } from "llamaindex";
import { storageContextFromDefaults } from "llamaindex/storage/StorageContext";
import * as dotenv from "dotenv";
import { getDocuments } from "./loader";
import { CHUNK_OVERLAP, CHUNK_SIZE, STORAGE_CACHE_DIR } from "./shared";
import { initSettings } from "./settings";
import { STORAGE_CACHE_DIR } from "./shared";
// Load environment variables from local .env file
dotenv.config();
@@ -20,7 +17,7 @@ async function getRuntime(func: any) {
return end - start;
}
async function generateDatasource(serviceContext: ServiceContext) {
async function generateDatasource() {
console.log(`Generating storage context...`);
// Split documents, create embeddings and store them in the storage context
const ms = await getRuntime(async () => {
@@ -30,18 +27,13 @@ async function generateDatasource(serviceContext: ServiceContext) {
const documents = await getDocuments();
await VectorStoreIndex.fromDocuments(documents, {
storageContext,
serviceContext,
});
});
console.log(`Storage context successfully generated in ${ms / 1000}s.`);
}
(async () => {
const serviceContext = serviceContextFromDefaults({
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
await generateDatasource(serviceContext);
initSettings();
await generateDatasource();
console.log("Finished generating storage.");
})();
@@ -1,18 +1,8 @@
import {
LLM,
serviceContextFromDefaults,
SimpleDocumentStore,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import { CHUNK_OVERLAP, CHUNK_SIZE, STORAGE_CACHE_DIR } from "./shared";
import { SimpleDocumentStore, VectorStoreIndex } from "llamaindex";
import { storageContextFromDefaults } from "llamaindex/storage/StorageContext";
import { STORAGE_CACHE_DIR } from "./shared";
export async function getDataSource(llm: LLM) {
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
export async function getDataSource() {
const storageContext = await storageContextFromDefaults({
persistDir: `${STORAGE_CACHE_DIR}`,
});
@@ -25,6 +15,5 @@ export async function getDataSource(llm: LLM) {
}
return await VectorStoreIndex.init({
storageContext,
serviceContext,
});
}
@@ -1,3 +1 @@
export const STORAGE_CACHE_DIR = "./cache";
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
@@ -1,11 +1,9 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import * as dotenv from "dotenv";
import {
PGVectorStore,
VectorStoreIndex,
storageContextFromDefaults,
} from "llamaindex";
import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
import { PGVectorStore } from "llamaindex/storage/vectorStore/PGVectorStore";
import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import {
PGVECTOR_COLLECTION,
PGVECTOR_SCHEMA,
@@ -37,6 +35,7 @@ async function loadAndIndex() {
(async () => {
checkRequiredEnvVars();
initSettings();
await loadAndIndex();
console.log("Finished generating storage.");
process.exit(0);
@@ -1,29 +1,18 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import { VectorStoreIndex } from "llamaindex";
import { PGVectorStore } from "llamaindex/storage/vectorStore/PGVectorStore";
import {
LLM,
PGVectorStore,
VectorStoreIndex,
serviceContextFromDefaults,
} from "llamaindex";
import {
CHUNK_OVERLAP,
CHUNK_SIZE,
PGVECTOR_SCHEMA,
PGVECTOR_TABLE,
checkRequiredEnvVars,
} from "./shared";
export async function getDataSource(llm: LLM) {
export async function getDataSource() {
checkRequiredEnvVars();
const pgvs = new PGVectorStore({
connectionString: process.env.PG_CONNECTION_STRING,
schemaName: PGVECTOR_SCHEMA,
tableName: PGVECTOR_TABLE,
});
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
return await VectorStoreIndex.fromVectorStore(pgvs, serviceContext);
return await VectorStoreIndex.fromVectorStore(pgvs);
}
@@ -1,10 +1,8 @@
export const PGVECTOR_COLLECTION = "data";
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
export const PGVECTOR_SCHEMA = "public";
export const PGVECTOR_TABLE = "llamaindex_embedding";
const REQUIRED_ENV_VARS = ["PG_CONNECTION_STRING", "OPENAI_API_KEY"];
const REQUIRED_ENV_VARS = ["PG_CONNECTION_STRING"];
export function checkRequiredEnvVars() {
const missingEnvVars = REQUIRED_ENV_VARS.filter((envVar) => {
@@ -1,11 +1,9 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import * as dotenv from "dotenv";
import {
PineconeVectorStore,
VectorStoreIndex,
storageContextFromDefaults,
} from "llamaindex";
import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
import { PineconeVectorStore } from "llamaindex/storage/vectorStore/PineconeVectorStore";
import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import { checkRequiredEnvVars } from "./shared";
dotenv.config();
@@ -28,6 +26,7 @@ async function loadAndIndex() {
(async () => {
checkRequiredEnvVars();
initSettings();
await loadAndIndex();
console.log("Finished generating storage.");
})();
@@ -1,19 +1,10 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import {
LLM,
PineconeVectorStore,
VectorStoreIndex,
serviceContextFromDefaults,
} from "llamaindex";
import { CHUNK_OVERLAP, CHUNK_SIZE, checkRequiredEnvVars } from "./shared";
import { VectorStoreIndex } from "llamaindex";
import { PineconeVectorStore } from "llamaindex/storage/vectorStore/PineconeVectorStore";
import { checkRequiredEnvVars } from "./shared";
export async function getDataSource(llm: LLM) {
export async function getDataSource() {
checkRequiredEnvVars();
const serviceContext = serviceContextFromDefaults({
llm,
chunkSize: CHUNK_SIZE,
chunkOverlap: CHUNK_OVERLAP,
});
const store = new PineconeVectorStore();
return await VectorStoreIndex.fromVectorStore(store, serviceContext);
return await VectorStoreIndex.fromVectorStore(store);
}
@@ -1,6 +1,3 @@
export const CHUNK_SIZE = 512;
export const CHUNK_OVERLAP = 20;
const REQUIRED_ENV_VARS = ["PINECONE_ENVIRONMENT", "PINECONE_API_KEY"];
export function checkRequiredEnvVars() {
@@ -0,0 +1,37 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import * as dotenv from "dotenv";
import { VectorStoreIndex, storageContextFromDefaults } from "llamaindex";
import { QdrantVectorStore } from "llamaindex/storage/vectorStore/QdrantVectorStore";
import { getDocuments } from "./loader";
import { initSettings } from "./settings";
import { checkRequiredEnvVars, getQdrantClient } from "./shared";
dotenv.config();
const collectionName = process.env.QDRANT_COLLECTION;
async function loadAndIndex() {
// load objects from storage and convert them into LlamaIndex Document objects
const documents = await getDocuments();
// Connect to Qdrant
const vectorStore = new QdrantVectorStore({
collectionName,
client: getQdrantClient(),
});
const storageContext = await storageContextFromDefaults({ vectorStore });
await VectorStoreIndex.fromDocuments(documents, {
storageContext: storageContext,
});
console.log(
`Successfully upload embeddings to Qdrant collection ${collectionName}.`,
);
}
(async () => {
checkRequiredEnvVars();
initSettings();
await loadAndIndex();
console.log("Finished generating storage.");
})();
@@ -0,0 +1,17 @@
import * as dotenv from "dotenv";
import { VectorStoreIndex } from "llamaindex";
import { QdrantVectorStore } from "llamaindex/storage/vectorStore/QdrantVectorStore";
import { checkRequiredEnvVars, getQdrantClient } from "./shared";
dotenv.config();
export async function getDataSource() {
checkRequiredEnvVars();
const collectionName = process.env.QDRANT_COLLECTION;
const store = new QdrantVectorStore({
collectionName,
client: getQdrantClient(),
});
return await VectorStoreIndex.fromVectorStore(store);
}
@@ -0,0 +1,32 @@
import { QdrantClient } from "@qdrant/js-client-rest";
const REQUIRED_ENV_VARS = ["QDRANT_URL", "QDRANT_COLLECTION"]; // QDRANT_API_KEY is optional
export function getQdrantClient() {
const url = process.env.QDRANT_URL;
if (!url) {
throw new Error("QDRANT_URL environment variable is required");
}
const apiKey = process.env?.QDRANT_API_KEY;
return new QdrantClient({
url,
apiKey,
});
}
export function checkRequiredEnvVars() {
const missingEnvVars = REQUIRED_ENV_VARS.filter((envVar) => {
return !process.env[envVar];
});
if (missingEnvVars.length > 0) {
console.log(
`The following environment variables are required but missing: ${missingEnvVars.join(
", ",
)}`,
);
throw new Error(
`Missing environment variables: ${missingEnvVars.join(", ")}`,
);
}
}
+3 -1
View File
@@ -1,3 +1,5 @@
# local env files
.env
node_modules/
node_modules/
tool-output/
@@ -31,6 +31,8 @@ if (isDevelopment) {
console.warn("Production CORS origin not set, defaulting to no CORS.");
}
app.use("/api/files/data", express.static("data"));
app.use("/api/files/tool-output", express.static("tool-output"));
app.use(express.text());
app.get("/", (req: Request, res: Response) => {
+1
View File
@@ -0,0 +1 @@
node-linker=hoisted
+13 -10
View File
@@ -1,20 +1,23 @@
{
"name": "llama-index-express-streaming",
"version": "1.0.0",
"main": "dist/index.mjs",
"main": "dist/index.js",
"scripts": {
"format": "prettier --ignore-unknown --cache --check .",
"format:write": "prettier --ignore-unknown --write .",
"build": "tsup index.ts --format esm --dts",
"start": "node dist/index.mjs",
"dev": "concurrently \"tsup index.ts --format esm --dts --watch\" \"nodemon -q dist/index.mjs\""
"build": "tsup index.ts --format cjs --dts",
"start": "node dist/index.js",
"dev": "concurrently \"tsup index.ts --format cjs --dts --watch\" \"nodemon -q dist/index.js\""
},
"dependencies": {
"ai": "^2.2.25",
"ai": "^3.0.21",
"cors": "^2.8.5",
"dotenv": "^16.3.1",
"express": "^4.18.2",
"llamaindex": "latest"
"llamaindex": "0.3.13",
"pdf2json": "3.0.5",
"ajv": "^8.12.0",
"@e2b/code-interpreter": "^0.0.5"
},
"devDependencies": {
"@types/cors": "^2.8.16",
@@ -22,12 +25,12 @@
"@types/node": "^20.9.5",
"concurrently": "^8.2.2",
"eslint": "^8.54.0",
"eslint-config-prettier": "^8.10.0",
"nodemon": "^3.0.1",
"tsup": "^8.0.1",
"typescript": "^5.3.2",
"prettier": "^3.2.5",
"prettier-plugin-organize-imports": "^3.2.4",
"eslint-config-prettier": "^8.10.0",
"ts-node": "^10.9.2"
"tsx": "^4.7.2",
"tsup": "^8.0.1",
"typescript": "^5.3.2"
}
}
@@ -1,5 +1,5 @@
import { Request, Response } from "express";
import { ChatMessage, MessageContent, OpenAI } from "llamaindex";
import { ChatMessage, MessageContent } from "llamaindex";
import { createChatEngine } from "./engine/chat";
const convertMessageContent = (
@@ -32,10 +32,6 @@ export const chatRequest = async (req: Request, res: Response) => {
});
}
const llm = new OpenAI({
model: process.env.MODEL || "gpt-3.5-turbo",
});
// Convert message content from Vercel/AI format to LlamaIndex/OpenAI format
// Note: The non-streaming template does not need the Vercel/AI format, we're still using it for consistency with the streaming template
const userMessageContent = convertMessageContent(
@@ -43,7 +39,7 @@ export const chatRequest = async (req: Request, res: Response) => {
data?.imageUrl,
);
const chatEngine = await createChatEngine(llm);
const chatEngine = await createChatEngine();
// Calling LlamaIndex's ChatEngine to get a response
const response = await chatEngine.chat({
@@ -61,7 +57,7 @@ export const chatRequest = async (req: Request, res: Response) => {
} catch (error) {
console.error("[LlamaIndex]", error);
return res.status(500).json({
error: (error as Error).message,
detail: (error as Error).message,
});
}
};
@@ -1,8 +1,9 @@
import { streamToResponse } from "ai";
import { Message, StreamData, streamToResponse } from "ai";
import { Request, Response } from "express";
import { ChatMessage, MessageContent, OpenAI } from "llamaindex";
import { ChatMessage, MessageContent, Settings } from "llamaindex";
import { createChatEngine } from "./engine/chat";
import { LlamaIndexStream } from "./llamaindex-stream";
import { createCallbackManager } from "./stream-helper";
const convertMessageContent = (
textMessage: string,
@@ -25,7 +26,7 @@ const convertMessageContent = (
export const chat = async (req: Request, res: Response) => {
try {
const { messages, data }: { messages: ChatMessage[]; data: any } = req.body;
const { messages, data }: { messages: Message[]; data: any } = req.body;
const userMessage = messages.pop();
if (!messages || !userMessage || userMessage.role !== "user") {
return res.status(400).json({
@@ -34,11 +35,7 @@ export const chat = async (req: Request, res: Response) => {
});
}
const llm = new OpenAI({
model: (process.env.MODEL as any) || "gpt-3.5-turbo",
});
const chatEngine = await createChatEngine(llm);
const chatEngine = await createChatEngine();
// Convert message content from Vercel/AI format to LlamaIndex/OpenAI format
const userMessageContent = convertMessageContent(
@@ -46,36 +43,33 @@ export const chat = async (req: Request, res: Response) => {
data?.imageUrl,
);
// Init Vercel AI StreamData
const vercelStreamData = new StreamData();
// Setup callbacks
const callbackManager = createCallbackManager(vercelStreamData);
// Calling LlamaIndex's ChatEngine to get a streamed response
const response = await chatEngine.chat({
message: userMessageContent,
chatHistory: messages,
stream: true,
const response = await Settings.withCallbackManager(callbackManager, () => {
return chatEngine.chat({
message: userMessageContent,
chatHistory: messages as ChatMessage[],
stream: true,
});
});
// Return a stream, which can be consumed by the Vercel/AI client
const { stream, data: streamData } = LlamaIndexStream(response, {
const stream = LlamaIndexStream(response, vercelStreamData, {
parserOptions: {
image_url: data?.imageUrl,
},
});
// Pipe LlamaIndexStream to response
const processedStream = stream.pipeThrough(streamData.stream);
return streamToResponse(processedStream, res, {
headers: {
// response MUST have the `X-Experimental-Stream-Data: 'true'` header
// so that the client uses the correct parsing logic, see
// https://sdk.vercel.ai/docs/api-reference/stream-data#on-the-server
"X-Experimental-Stream-Data": "true",
"Content-Type": "text/plain; charset=utf-8",
"Access-Control-Expose-Headers": "X-Experimental-Stream-Data",
},
});
return streamToResponse(stream, res, {}, vercelStreamData);
} catch (error) {
console.error("[LlamaIndex]", error);
return res.status(500).json({
error: (error as Error).message,
detail: (error as Error).message,
});
}
};
@@ -1,7 +1,7 @@
import { LLM, SimpleChatEngine } from "llamaindex";
import { Settings, SimpleChatEngine } from "llamaindex";
export async function createChatEngine(llm: LLM) {
export async function createChatEngine() {
return new SimpleChatEngine({
llm,
llm: Settings.llm,
});
}
@@ -0,0 +1,93 @@
import {
Anthropic,
GEMINI_EMBEDDING_MODEL,
GEMINI_MODEL,
Gemini,
GeminiEmbedding,
OpenAI,
OpenAIEmbedding,
Settings,
} from "llamaindex";
import { HuggingFaceEmbedding } from "llamaindex/embeddings/HuggingFaceEmbedding";
import { OllamaEmbedding } from "llamaindex/embeddings/OllamaEmbedding";
import { ALL_AVAILABLE_ANTHROPIC_MODELS } from "llamaindex/llm/anthropic";
import { Ollama } from "llamaindex/llm/ollama";
const CHUNK_SIZE = 512;
const CHUNK_OVERLAP = 20;
export const initSettings = async () => {
// HINT: you can delete the initialization code for unused model providers
console.log(`Using '${process.env.MODEL_PROVIDER}' model provider`);
if (!process.env.MODEL || !process.env.EMBEDDING_MODEL) {
throw new Error("'MODEL' and 'EMBEDDING_MODEL' env variables must be set.");
}
switch (process.env.MODEL_PROVIDER) {
case "ollama":
initOllama();
break;
case "anthropic":
initAnthropic();
break;
case "gemini":
initGemini();
break;
default:
initOpenAI();
break;
}
Settings.chunkSize = CHUNK_SIZE;
Settings.chunkOverlap = CHUNK_OVERLAP;
};
function initOpenAI() {
Settings.llm = new OpenAI({
model: process.env.MODEL ?? "gpt-3.5-turbo",
maxTokens: 512,
});
Settings.embedModel = new OpenAIEmbedding({
model: process.env.EMBEDDING_MODEL,
dimensions: process.env.EMBEDDING_DIM
? parseInt(process.env.EMBEDDING_DIM)
: undefined,
});
}
function initOllama() {
const config = {
host: process.env.OLLAMA_BASE_URL ?? "http://127.0.0.1:11434",
};
Settings.llm = new Ollama({
model: process.env.MODEL ?? "",
config,
});
Settings.embedModel = new OllamaEmbedding({
model: process.env.EMBEDDING_MODEL ?? "",
config,
});
}
function initAnthropic() {
const embedModelMap: Record<string, string> = {
"all-MiniLM-L6-v2": "Xenova/all-MiniLM-L6-v2",
"all-mpnet-base-v2": "Xenova/all-mpnet-base-v2",
};
Settings.llm = new Anthropic({
model: process.env.MODEL as keyof typeof ALL_AVAILABLE_ANTHROPIC_MODELS,
});
Settings.embedModel = new HuggingFaceEmbedding({
modelType: embedModelMap[process.env.EMBEDDING_MODEL!],
});
}
function initGemini() {
Settings.llm = new Gemini({
model: process.env.MODEL as GEMINI_MODEL,
});
Settings.embedModel = new GeminiEmbedding({
model: process.env.EMBEDDING_MODEL as GEMINI_EMBEDDING_MODEL,
});
}
@@ -1,49 +1,65 @@
import {
JSONValue,
StreamData,
createCallbacksTransformer,
createStreamDataTransformer,
experimental_StreamData,
trimStartOfStreamHelper,
type AIStreamCallbacksAndOptions,
} from "ai";
import { Response, StreamingAgentChatResponse } from "llamaindex";
import {
Metadata,
NodeWithScore,
Response,
ToolCallLLMMessageOptions,
} from "llamaindex";
import { AgentStreamChatResponse } from "llamaindex/agent/base";
import { appendImageData, appendSourceData } from "./stream-helper";
type LlamaIndexResponse =
| AgentStreamChatResponse<ToolCallLLMMessageOptions>
| Response;
type ParserOptions = {
image_url?: string;
};
function createParser(
res: AsyncIterable<Response>,
data: experimental_StreamData,
res: AsyncIterable<LlamaIndexResponse>,
data: StreamData,
opts?: ParserOptions,
) {
const it = res[Symbol.asyncIterator]();
const trimStartOfStream = trimStartOfStreamHelper();
let sourceNodes: NodeWithScore<Metadata>[] | undefined;
return new ReadableStream<string>({
start() {
// if image_url is provided, send it via the data stream
if (opts?.image_url) {
const message: JSONValue = {
type: "image_url",
image_url: {
url: opts.image_url,
},
};
data.append(message);
} else {
data.append({}); // send an empty image response for the user's message
}
appendImageData(data, opts?.image_url);
},
async pull(controller): Promise<void> {
const { value, done } = await it.next();
if (done) {
if (sourceNodes) {
appendSourceData(data, sourceNodes);
}
controller.close();
data.append({}); // send an empty image response for the assistant's message
data.close();
return;
}
const text = trimStartOfStream(value.response ?? "");
let delta;
if (value instanceof Response) {
// handle Response type
if (value.sourceNodes) {
// get source nodes from the first response
sourceNodes = value.sourceNodes;
}
delta = value.response ?? "";
} else {
// handle other types
delta = value.response.delta;
}
const text = trimStartOfStream(delta ?? "");
if (text) {
controller.enqueue(text);
}
@@ -52,21 +68,14 @@ function createParser(
}
export function LlamaIndexStream(
response: StreamingAgentChatResponse | AsyncIterable<Response>,
response: AsyncIterable<LlamaIndexResponse>,
data: StreamData,
opts?: {
callbacks?: AIStreamCallbacksAndOptions;
parserOptions?: ParserOptions;
},
): { stream: ReadableStream; data: experimental_StreamData } {
const data = new experimental_StreamData();
const res =
response instanceof StreamingAgentChatResponse
? response.response
: response;
return {
stream: createParser(res, data, opts?.parserOptions)
.pipeThrough(createCallbacksTransformer(opts?.callbacks))
.pipeThrough(createStreamDataTransformer(true)),
data,
};
): ReadableStream<Uint8Array> {
return createParser(response, data, opts?.parserOptions)
.pipeThrough(createCallbacksTransformer(opts?.callbacks))
.pipeThrough(createStreamDataTransformer());
}

Some files were not shown because too many files have changed in this diff Show More