mirror of
https://github.com/run-llama/LlamaIndexTS.git
synced 2026-07-01 22:14:03 -04:00
Compare commits
52 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 74fc725f37 | |||
| a0a74aed60 | |||
| 11feef8c82 | |||
| 9c5ff164ac | |||
| 7edeb1c2d7 | |||
| 8b95abdc85 | |||
| ffe0cd1ef1 | |||
| 5d2111a19f | |||
| 68ac7fd57f | |||
| 7320d96a36 | |||
| ee17fb475b | |||
| 28b877e31f | |||
| 4389b80a52 | |||
| d3bc663951 | |||
| 4810364788 | |||
| 2dcad52dd9 | |||
| 0bf8d80b12 | |||
| e4bba02aec | |||
| 1caa0da657 | |||
| 711c814bb2 | |||
| 5b832eb927 | |||
| 49988431f6 | |||
| 72d65dd51a | |||
| 553bc55b19 | |||
| fc6f69833c | |||
| c7fd06841f | |||
| 4648da6849 | |||
| 0188cf3bb6 | |||
| e0b4f9c047 | |||
| 4895bba96e | |||
| 76d1df817b | |||
| 83d7f415e2 | |||
| ae1149ffaf | |||
| 0148354dbe | |||
| 11b3856334 | |||
| e8f229cd01 | |||
| 75b70e5824 | |||
| 1711f6d8fc | |||
| 20d16abdf4 | |||
| 2411c9fbd0 | |||
| be3e280f2a | |||
| 2afcbe6587 | |||
| 22ff486fbe | |||
| eed0b0415d | |||
| d9d6c56ed5 | |||
| f99a237093 | |||
| fcbf18344c | |||
| bf8cbeb6c5 | |||
| e27e7dd054 | |||
| 8b66cf4341 | |||
| 6f4549bdea | |||
| c654398f75 |
@@ -12,6 +12,10 @@ concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
POSTGRES_USER: runneradmin
|
||||
POSTGRES_HOST_AUTH_METHOD: trust
|
||||
|
||||
jobs:
|
||||
e2e:
|
||||
strategy:
|
||||
@@ -22,9 +26,17 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- uses: ankane/setup-postgres@v1
|
||||
with:
|
||||
database: llamaindex_node_test
|
||||
dev-files: true
|
||||
- run: |
|
||||
cd /tmp
|
||||
git clone --branch v0.7.0 https://github.com/pgvector/pgvector.git
|
||||
cd pgvector
|
||||
make
|
||||
sudo make install
|
||||
- uses: pnpm/action-setup@v4
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
@@ -42,7 +54,6 @@ jobs:
|
||||
node-version: [18.x, 20.x, 22.x]
|
||||
name: Test on Node.js ${{ matrix.node-version }}
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: pnpm/action-setup@v4
|
||||
@@ -92,7 +103,7 @@ jobs:
|
||||
- nextjs-agent
|
||||
- nextjs-edge-runtime
|
||||
- nextjs-node-runtime
|
||||
# - waku-query-engine
|
||||
- waku-query-engine
|
||||
runs-on: ubuntu-latest
|
||||
name: Build LlamaIndex Example (${{ matrix.packages }})
|
||||
steps:
|
||||
@@ -131,6 +142,9 @@ jobs:
|
||||
- name: Pack @llamaindex/cloud
|
||||
run: pnpm pack --pack-destination ${{ runner.temp }}
|
||||
working-directory: packages/cloud
|
||||
- name: Pack @llamaindex/openai
|
||||
run: pnpm pack --pack-destination ${{ runner.temp }}
|
||||
working-directory: packages/llm/openai
|
||||
- name: Pack @llamaindex/core
|
||||
run: pnpm pack --pack-destination ${{ runner.temp }}
|
||||
working-directory: packages/core
|
||||
|
||||
@@ -36,9 +36,44 @@ For now, browser support is limited due to the lack of support for [AsyncLocalSt
|
||||
npm install llamaindex
|
||||
pnpm install llamaindex
|
||||
yarn add llamaindex
|
||||
jsr install @llamaindex/core
|
||||
```
|
||||
|
||||
### Setup TypeScript
|
||||
|
||||
```json5
|
||||
{
|
||||
compilerOptions: {
|
||||
// ⬇️ add this line to your tsconfig.json
|
||||
moduleResolution: "bundler", // or "node16"
|
||||
},
|
||||
}
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary>Why?</summary>
|
||||
We are shipping both ESM and CJS module, and compatible with Vercel Edge, Cloudflare Workers, and other serverless platforms.
|
||||
|
||||
So we are using [conditional exports](https://nodejs.org/api/packages.html#conditional-exports) to support all environments.
|
||||
|
||||
This is a kind of modern way of shipping packages, but might cause TypeScript type check to fail because of legacy module resolution.
|
||||
|
||||
Imaging you put output file into `/dist/openai.js` but you are importing `llamaindex/openai` in your code, and set `package.json` like this:
|
||||
|
||||
```json
|
||||
{
|
||||
"exports": {
|
||||
"./openai": "./dist/openai.js"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
In old module resolution, TypeScript will not be able to find the module because it is not follow the file structure, even you run `node index.js` successfully. (on Node.js >=16)
|
||||
|
||||
See more about [moduleResolution](https://www.typescriptlang.org/docs/handbook/modules/theory.html#module-resolution) or
|
||||
[TypeScript 5.0 blog](https://devblogs.microsoft.com/typescript/announcing-typescript-5-0/#--moduleresolution-bundler7).
|
||||
|
||||
</details>
|
||||
|
||||
### Node.js
|
||||
|
||||
```ts
|
||||
@@ -154,6 +189,21 @@ export async function chatWithAgent(
|
||||
}
|
||||
```
|
||||
|
||||
### Vite
|
||||
|
||||
We have some wasm dependencies for better performance. You can use `vite-plugin-wasm` to load them.
|
||||
|
||||
```ts
|
||||
import wasm from "vite-plugin-wasm";
|
||||
|
||||
export default {
|
||||
plugins: [wasm()],
|
||||
ssr: {
|
||||
external: ["tiktoken"],
|
||||
},
|
||||
};
|
||||
```
|
||||
|
||||
## Playground
|
||||
|
||||
Check out our NextJS playground at https://llama-playground.vercel.app/. The source is available at https://github.com/run-llama/ts-playground
|
||||
|
||||
@@ -1,5 +1,98 @@
|
||||
# docs
|
||||
|
||||
## 0.0.69
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [11feef8]
|
||||
- llamaindex@0.6.0
|
||||
- @llamaindex/examples@0.0.8
|
||||
|
||||
## 0.0.68
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [7edeb1c]
|
||||
- llamaindex@0.5.27
|
||||
|
||||
## 0.0.67
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ffe0cd1]
|
||||
- Updated dependencies [ffe0cd1]
|
||||
- llamaindex@0.5.26
|
||||
|
||||
## 0.0.66
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [4810364]
|
||||
- Updated dependencies [d3bc663]
|
||||
- llamaindex@0.5.25
|
||||
|
||||
## 0.0.65
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.5.24
|
||||
|
||||
## 0.0.64
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.5.23
|
||||
|
||||
## 0.0.63
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [4648da6]
|
||||
- llamaindex@0.5.22
|
||||
|
||||
## 0.0.62
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ae1149f]
|
||||
- Updated dependencies [2411c9f]
|
||||
- Updated dependencies [e8f229c]
|
||||
- Updated dependencies [11b3856]
|
||||
- Updated dependencies [83d7f41]
|
||||
- Updated dependencies [0148354]
|
||||
- Updated dependencies [1711f6d]
|
||||
- llamaindex@0.5.21
|
||||
|
||||
## 0.0.61
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d9d6c56]
|
||||
- Updated dependencies [22ff486]
|
||||
- Updated dependencies [eed0b04]
|
||||
- llamaindex@0.5.20
|
||||
|
||||
## 0.0.60
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [fcbf183]
|
||||
- llamaindex@0.5.19
|
||||
|
||||
## 0.0.59
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [8b66cf4]
|
||||
- llamaindex@0.5.18
|
||||
|
||||
## 0.0.58
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [c654398]
|
||||
- llamaindex@0.5.17
|
||||
|
||||
## 0.0.57
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -6,10 +6,17 @@ sidebar_position: 2
|
||||
|
||||
We support Node.JS versions 18, 20 and 22, with experimental support for Deno, Bun and Vercel Edge functions.
|
||||
|
||||
## NextJS App Router
|
||||
## NextJS
|
||||
|
||||
If you're using NextJS App Router route handlers/serverless functions, you'll need to use the NodeJS mode:
|
||||
If you're using NextJS you'll need to add `withLlamaIndex` to your `next.config.js` file. This will add the necessary configuration for included 3rd-party libraries to your build:
|
||||
|
||||
```js
|
||||
export const runtime = "nodejs"; // default
|
||||
// next.config.js
|
||||
const withLlamaIndex = require("llamaindex/next");
|
||||
|
||||
module.exports = withLlamaIndex({
|
||||
// your next.js config
|
||||
});
|
||||
```
|
||||
|
||||
For details, check the latest [withLlamaIndex](https://github.com/run-llama/LlamaIndexTS/blob/main/packages/llamaindex/src/next.ts) implementation.
|
||||
|
||||
@@ -50,10 +50,10 @@ We want to see what our agent is up to, so we're going to hook into some events
|
||||
|
||||
```javascript
|
||||
Settings.callbackManager.on("llm-tool-call", (event) => {
|
||||
console.log(event.detail.payload);
|
||||
console.log(event.detail);
|
||||
});
|
||||
Settings.callbackManager.on("llm-tool-result", (event) => {
|
||||
console.log(event.detail.payload);
|
||||
console.log(event.detail);
|
||||
});
|
||||
```
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ LlamaIndex.TS handles several major use cases:
|
||||
|
||||
- **Structured Data Extraction**: turning complex, unstructured and semi-structured data into uniform, programmatically accessible formats.
|
||||
- **Retrieval-Augmented Generation (RAG)**: answering queries across your internal data by providing LLMs with up-to-date, semantically relevant context including Question and Answer systems and chat bots.
|
||||
- **Autonomous Agents**: building software that is capable of intelligently selecting and using tools to accomplish tasks in an interative, unsupervised manner.
|
||||
- **Autonomous Agents**: building software that is capable of intelligently selecting and using tools to accomplish tasks in an interactive, unsupervised manner.
|
||||
|
||||
## 👨👩👧👦 Who is LlamaIndex for?
|
||||
|
||||
|
||||
@@ -27,3 +27,4 @@ for await (const chunk of stream) {
|
||||
|
||||
- [ContextChatEngine](../api/classes/ContextChatEngine.md)
|
||||
- [CondenseQuestionChatEngine](../api/classes/ContextChatEngine.md)
|
||||
- [SimpleChatEngine](../api/classes/SimpleChatEngine.md)
|
||||
|
||||
@@ -21,3 +21,4 @@ const index = await VectorStoreIndex.fromDocuments([document]);
|
||||
|
||||
- [SummaryIndex](../api/classes/SummaryIndex.md)
|
||||
- [VectorStoreIndex](../api/classes/VectorStoreIndex.md)
|
||||
- [KeywordTableIndex](../api/classes/KeywordTableIndex.md)
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
A simple JSON data loader with various options.
|
||||
Either parses the entire string, cleaning it and treat each line as an embedding or performs a recursive depth-first traversal yielding JSON paths.
|
||||
Supports streaming of large JSON data using [@discoveryjs/json-ext](https://github.com/discoveryjs/json-ext)
|
||||
|
||||
## Usage
|
||||
|
||||
@@ -20,12 +21,16 @@ const docsFromContent = reader.loadDataAsContent(content);
|
||||
|
||||
Basic:
|
||||
|
||||
- `streamingThreshold?`: The threshold for using streaming mode in MB of the JSON Data. CEstimates characters by calculating bytes: `(streamingThreshold * 1024 * 1024) / 2` and comparing against `.length` of the JSON string. Set `undefined` to disable streaming or `0` to always use streaming. Default is `50` MB.
|
||||
|
||||
- `ensureAscii?`: Wether to ensure only ASCII characters be present in the output by converting non-ASCII characters to their unicode escape sequence. Default is `false`.
|
||||
|
||||
- `isJsonLines?`: Wether the JSON is in JSON Lines format. If true, will split into lines, remove empty one and parse each line as JSON. Default is `false`
|
||||
- `isJsonLines?`: Wether the JSON is in JSON Lines format. If true, will split into lines, remove empty one and parse each line as JSON. Note: Uses a custom streaming parser, most likely less robust than json-ext. Default is `false`
|
||||
|
||||
- `cleanJson?`: Whether to clean the JSON by filtering out structural characters (`{}, [], and ,`). If set to false, it will just parse the JSON, not removing structural characters. Default is `true`.
|
||||
|
||||
- `logger?`: A placeholder for a custom logger function.
|
||||
|
||||
Depth-First-Traversal:
|
||||
|
||||
- `levelsBack?`: Specifies how many levels up the JSON structure to include in the output. `cleanJson` will be ignored. If set to 0, all levels are included. If undefined, parses the entire JSON, treat each line as an embedding and create a document per top-level array. Default is `undefined`
|
||||
|
||||
@@ -98,3 +98,7 @@ Use the `embedDocuments` method to generate embeddings for the texts.
|
||||
const result = await embeddings.embedDocuments(texts);
|
||||
console.log(result); // Perfectly customized embeddings, ready to serve.
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
- [MixedbreadAIEmbeddings](../../../api/classes/MixedbreadAIEmbeddings.md)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
## Concept
|
||||
|
||||
Evaluation and benchmarking are crucial concepts in LLM development. To improve the perfomance of an LLM app (RAG, agents) you must have a way to measure it.
|
||||
Evaluation and benchmarking are crucial concepts in LLM development. To improve the performance of an LLM app (RAG, agents) you must have a way to measure it.
|
||||
|
||||
LlamaIndex offers key modules to measure the quality of generated results. We also offer key modules to measure retrieval quality.
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ import {
|
||||
MetadataMode,
|
||||
OpenAIEmbedding,
|
||||
TitleExtractor,
|
||||
SimpleNodeParser,
|
||||
SentenceSplitter,
|
||||
} from "llamaindex";
|
||||
|
||||
async function main() {
|
||||
@@ -29,7 +29,7 @@ async function main() {
|
||||
const document = new Document({ text: essay, id_: path });
|
||||
const pipeline = new IngestionPipeline({
|
||||
transformations: [
|
||||
new SimpleNodeParser({ chunkSize: 1024, chunkOverlap: 20 }),
|
||||
new SentenceSplitter({ chunkSize: 1024, chunkOverlap: 20 }),
|
||||
new TitleExtractor(),
|
||||
new OpenAIEmbedding(),
|
||||
],
|
||||
@@ -62,7 +62,7 @@ import {
|
||||
MetadataMode,
|
||||
OpenAIEmbedding,
|
||||
TitleExtractor,
|
||||
SimpleNodeParser,
|
||||
SentenceSplitter,
|
||||
QdrantVectorStore,
|
||||
VectorStoreIndex,
|
||||
} from "llamaindex";
|
||||
@@ -81,7 +81,7 @@ async function main() {
|
||||
const document = new Document({ text: essay, id_: path });
|
||||
const pipeline = new IngestionPipeline({
|
||||
transformations: [
|
||||
new SimpleNodeParser({ chunkSize: 1024, chunkOverlap: 20 }),
|
||||
new SentenceSplitter({ chunkSize: 1024, chunkOverlap: 20 }),
|
||||
new TitleExtractor(),
|
||||
new OpenAIEmbedding(),
|
||||
],
|
||||
|
||||
@@ -4,7 +4,7 @@ A transformation is something that takes a list of nodes as an input, and return
|
||||
|
||||
Currently, the following components are Transformation objects:
|
||||
|
||||
- [SimpleNodeParser](../../api/classes/SimpleNodeParser.md)
|
||||
- [SentenceSplitter](../../api/classes/SentenceSplitter.md)
|
||||
- [MetadataExtractor](../documents_and_nodes/metadata_extraction.md)
|
||||
- [Embeddings](../embeddings/index.md)
|
||||
|
||||
@@ -13,10 +13,10 @@ Currently, the following components are Transformation objects:
|
||||
While transformations are best used with with an IngestionPipeline, they can also be used directly.
|
||||
|
||||
```ts
|
||||
import { SimpleNodeParser, TitleExtractor, Document } from "llamaindex";
|
||||
import { SentenceSplitter, TitleExtractor, Document } from "llamaindex";
|
||||
|
||||
async function main() {
|
||||
let nodes = new SimpleNodeParser().getNodesFromDocuments([
|
||||
let nodes = new SentenceSplitter().getNodesFromDocuments([
|
||||
new Document({ text: "I am 10 years old. John is 20 years old." }),
|
||||
]);
|
||||
|
||||
@@ -34,15 +34,15 @@ main().catch(console.error);
|
||||
|
||||
## Custom Transformations
|
||||
|
||||
You can implement any transformation yourself by implementing the `TransformerComponent`.
|
||||
You can implement any transformation yourself by implementing the `TransformComponent`.
|
||||
|
||||
The following custom transformation will remove any special characters or punctutaion in text.
|
||||
The following custom transformation will remove any special characters or punctuation in text.
|
||||
|
||||
```ts
|
||||
import { TransformerComponent, Node } from "llamaindex";
|
||||
import { TransformComponent, TextNode } from "llamaindex";
|
||||
|
||||
class RemoveSpecialCharacters extends TransformerComponent {
|
||||
async transform(nodes: Node[]): Promise<Node[]> {
|
||||
export class RemoveSpecialCharacters extends TransformComponent {
|
||||
async transform(nodes: TextNode[]): Promise<TextNode[]> {
|
||||
for (const node of nodes) {
|
||||
node.text = node.text.replace(/[^\w\s]/gi, "");
|
||||
}
|
||||
@@ -75,3 +75,7 @@ async function main() {
|
||||
|
||||
main().catch(console.error);
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
- [TransformComponent](../../api/classes/TransformComponent.md)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# DeepSeek LLM
|
||||
|
||||
[DeepSeek Platform](https://platform.deepseek.com/)
|
||||
|
||||
## Usage
|
||||
|
||||
```ts
|
||||
@@ -45,6 +47,6 @@ Currently does not support function calling.
|
||||
|
||||
[Currently does not support json-output param while still is very good at json generating.](https://platform.deepseek.com/api-docs/faq#does-your-api-support-json-output)
|
||||
|
||||
## API platform
|
||||
## API Reference
|
||||
|
||||
- [DeepSeek platform](https://platform.deepseek.com/)
|
||||
- [DeepSeekLLM](../../../api/classes/DeepSeekLLM.md)
|
||||
|
||||
@@ -7,9 +7,9 @@ sidebar_position: 4
|
||||
The `NodeParser` in LlamaIndex is responsible for splitting `Document` objects into more manageable `Node` objects. When you call `.fromDocuments()`, the `NodeParser` from the `Settings` is used to do this automatically for you. Alternatively, you can use it to split documents ahead of time.
|
||||
|
||||
```typescript
|
||||
import { Document, SimpleNodeParser } from "llamaindex";
|
||||
import { Document, SentenceSplitter } from "llamaindex";
|
||||
|
||||
const nodeParser = new SimpleNodeParser();
|
||||
const nodeParser = new SentenceSplitter();
|
||||
|
||||
Settings.nodeParser = nodeParser;
|
||||
```
|
||||
@@ -93,6 +93,5 @@ The output metadata will be something like:
|
||||
|
||||
## API Reference
|
||||
|
||||
- [SimpleNodeParser](../api/classes/SimpleNodeParser.md)
|
||||
- [SentenceSplitter](../api/classes/SentenceSplitter.md)
|
||||
- [MarkdownNodeParser](../api/classes/MarkdownNodeParser.md)
|
||||
|
||||
@@ -163,3 +163,7 @@ Use the `rerank` method to reorder the documents based on the query.
|
||||
const result = await reranker.rerank(documents, query);
|
||||
console.log(result); // Perfectly customized results, ready to serve.
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
- [MixedbreadAIReranker](../../api/classes/MixedbreadAIReranker.md)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# QueryEngine
|
||||
|
||||
A query engine wraps a `Retriever` and a `ResponseSynthesizer` into a pipeline, that will use the query string to fetech nodes and then send them to the LLM to generate a response.
|
||||
A query engine wraps a `Retriever` and a `ResponseSynthesizer` into a pipeline, that will use the query string to fetch nodes and then send them to the LLM to generate a response.
|
||||
|
||||
```typescript
|
||||
const queryEngine = index.asQueryEngine();
|
||||
|
||||
@@ -15,7 +15,7 @@ import {
|
||||
OpenAI,
|
||||
RouterQueryEngine,
|
||||
SimpleDirectoryReader,
|
||||
SimpleNodeParser,
|
||||
SentenceSplitter,
|
||||
SummaryIndex,
|
||||
VectorStoreIndex,
|
||||
Settings,
|
||||
@@ -34,11 +34,11 @@ const documents = await new SimpleDirectoryReader().loadData({
|
||||
|
||||
## Service Context
|
||||
|
||||
Next, we need to define some basic rules and parse the documents into nodes. We will use the `SimpleNodeParser` to parse the documents into nodes and `Settings` to define the rules (eg. LLM API key, chunk size, etc.):
|
||||
Next, we need to define some basic rules and parse the documents into nodes. We will use the `SentenceSplitter` to parse the documents into nodes and `Settings` to define the rules (eg. LLM API key, chunk size, etc.):
|
||||
|
||||
```ts
|
||||
Settings.llm = new OpenAI();
|
||||
Settings.nodeParser = new SimpleNodeParser({
|
||||
Settings.nodeParser = new SentenceSplitter({
|
||||
chunkSize: 1024,
|
||||
});
|
||||
```
|
||||
@@ -104,14 +104,14 @@ import {
|
||||
OpenAI,
|
||||
RouterQueryEngine,
|
||||
SimpleDirectoryReader,
|
||||
SimpleNodeParser,
|
||||
SentenceSplitter,
|
||||
SummaryIndex,
|
||||
VectorStoreIndex,
|
||||
Settings,
|
||||
} from "llamaindex";
|
||||
|
||||
Settings.llm = new OpenAI();
|
||||
Settings.nodeParser = new SimpleNodeParser({
|
||||
Settings.nodeParser = new SentenceSplitter({
|
||||
chunkSize: 1024,
|
||||
});
|
||||
|
||||
|
||||
@@ -4,7 +4,14 @@ sidebar_position: 5
|
||||
|
||||
# Retriever
|
||||
|
||||
A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a query string. Aa `VectorIndexRetriever` will fetch the top-k most similar nodes. Meanwhile, a `SummaryIndexRetriever` will fetch all nodes no matter the query.
|
||||
A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a query string.
|
||||
|
||||
- [VectorIndexRetriever](../api/classes/VectorIndexRetriever.md) will fetch the top-k most similar nodes. Ideal for dense retrieval to find most relevant nodes.
|
||||
- [SummaryIndexRetriever](../api/classes/SummaryIndexRetriever.md) will fetch all nodes no matter the query. Ideal when complete context is necessary, e.g. analyzing large datasets.
|
||||
- [SummaryIndexLLMRetriever](../api/classes/SummaryIndexLLMRetriever.md) utilizes an LLM to score and filter nodes based on relevancy to the query.
|
||||
- [KeywordTableLLMRetriever](../api/classes/KeywordTableLLMRetriever.md) uses an LLM to extract keywords from the query and retrieve relevant nodes based on keyword matches.
|
||||
- [KeywordTableSimpleRetriever](../api/classes/KeywordTableSimpleRetriever.md) uses a basic frequency-based approach to extract keywords and retrieve nodes.
|
||||
- [KeywordTableRAKERetriever](../api/classes/KeywordTableRAKERetriever.md) uses the RAKE (Rapid Automatic Keyword Extraction) algorithm to extract keywords from the query, focusing on co-occurrence and context for keyword-based retrieval.
|
||||
|
||||
```typescript
|
||||
const retriever = vectorIndex.asRetriever({
|
||||
@@ -14,9 +21,3 @@ const retriever = vectorIndex.asRetriever({
|
||||
// Fetch nodes!
|
||||
const nodesWithScore = await retriever.retrieve({ query: "query string" });
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
- [SummaryIndexRetriever](../api/classes/SummaryIndexRetriever.md)
|
||||
- [SummaryIndexLLMRetriever](../api/classes/SummaryIndexLLMRetriever.md)
|
||||
- [VectorIndexRetriever](../api/classes/VectorIndexRetriever.md)
|
||||
|
||||
@@ -4,12 +4,19 @@ sidebar_position: 7
|
||||
|
||||
# Storage
|
||||
|
||||
Storage in LlamaIndex.TS works automatically once you've configured a `StorageContext` object. Just configure the `persistDir` and attach it to an index.
|
||||
Storage in LlamaIndex.TS works automatically once you've configured a
|
||||
`StorageContext` object.
|
||||
|
||||
Right now, only saving and loading from disk is supported, with future integrations planned!
|
||||
## Local Storage
|
||||
|
||||
You can configure the `persistDir` and attach it to an index.
|
||||
|
||||
```typescript
|
||||
import { Document, VectorStoreIndex, storageContextFromDefaults } from "./src";
|
||||
import {
|
||||
Document,
|
||||
VectorStoreIndex,
|
||||
storageContextFromDefaults,
|
||||
} from "llamaindex";
|
||||
|
||||
const storageContext = await storageContextFromDefaults({
|
||||
persistDir: "./storage",
|
||||
@@ -21,6 +28,33 @@ const index = await VectorStoreIndex.fromDocuments([document], {
|
||||
});
|
||||
```
|
||||
|
||||
## PostgreSQL Storage
|
||||
|
||||
You can configure the `schemaName`, `tableName`, `namespace`, and
|
||||
`connectionString`. If a `connectionString` is not
|
||||
provided, it will use the environment variables `PGHOST`, `PGUSER`,
|
||||
`PGPASSWORD`, `PGDATABASE` and `PGPORT`.
|
||||
|
||||
```typescript
|
||||
import {
|
||||
Document,
|
||||
VectorStoreIndex,
|
||||
PostgresDocumentStore,
|
||||
PostgresIndexStore,
|
||||
storageContextFromDefaults,
|
||||
} from "llamaindex";
|
||||
|
||||
const storageContext = await storageContextFromDefaults({
|
||||
docStore: new PostgresDocumentStore(),
|
||||
indexStore: new PostgresIndexStore(),
|
||||
});
|
||||
|
||||
const document = new Document({ text: "Test Text" });
|
||||
const index = await VectorStoreIndex.fromDocuments([document], {
|
||||
storageContext,
|
||||
});
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
- [StorageContext](../api/interfaces/StorageContext.md)
|
||||
|
||||
@@ -0,0 +1,168 @@
|
||||
import CodeBlock from "@theme/CodeBlock";
|
||||
import CodeSource from "!raw-loader!../../../../examples/workflow/joke.ts";
|
||||
|
||||
# Workflows
|
||||
|
||||
A `Workflow` in LlamaIndexTS is an event-driven abstraction used to chain together several events. Workflows are made up of `steps`, with each step responsible for handling certain event types and emitting new events.
|
||||
|
||||
Workflows in LlamaIndexTS work by defining step functions that handle specific event types and emit new events.
|
||||
|
||||
When a step function is added to a workflow, you need to specify the input and optionally the output event types (used for validation). The specification of the input events ensures each step only runs when an accepted event is ready.
|
||||
|
||||
You can create a `Workflow` to do anything! Build an agent, a RAG flow, an extraction flow, or anything else you want.
|
||||
|
||||
## Getting Started
|
||||
|
||||
As an illustrative example, let's consider a naive workflow where a joke is generated and then critiqued.
|
||||
|
||||
<CodeBlock language="ts">{CodeSource}</CodeBlock>
|
||||
|
||||
There's a few moving pieces here, so let's go through this piece by piece.
|
||||
|
||||
### Defining Workflow Events
|
||||
|
||||
```typescript
|
||||
export class JokeEvent extends WorkflowEvent<{ joke: string }> {}
|
||||
```
|
||||
|
||||
Events are user-defined classes that extend `WorkflowEvent` and contain arbitrary data provided as template argument. In this case, our workflow relies on a single user-defined event, the `JokeEvent` with a `joke` attribute of type `string`.
|
||||
|
||||
### Setting up the Workflow Class
|
||||
|
||||
```typescript
|
||||
const llm = new OpenAI();
|
||||
...
|
||||
const jokeFlow = new Workflow({ verbose: true });
|
||||
```
|
||||
|
||||
Our workflow is implemented by initiating the `Workflow` class. For simplicity, we created a `OpenAI` llm instance.
|
||||
|
||||
### Workflow Entry Points
|
||||
|
||||
```typescript
|
||||
const generateJoke = async (_context: Context, ev: StartEvent) => {
|
||||
const prompt = `Write your best joke about ${ev.data.input}.`;
|
||||
const response = await llm.complete({ prompt });
|
||||
return new JokeEvent({ joke: response.text });
|
||||
};
|
||||
```
|
||||
|
||||
Here, we come to the entry-point of our workflow. While events are user-defined, there are two special-case events, the `StartEvent` and the `StopEvent`. Here, the `StartEvent` signifies where to send the initial workflow input.
|
||||
|
||||
The `StartEvent` is a bit of a special object since it can hold arbitrary attributes. Here, we accessed the topic with `ev.data.input`.
|
||||
|
||||
At this point, you may have noticed that we haven't explicitly told the workflow what events are handled by which steps.
|
||||
|
||||
To do so, we use the `addStep` method which adds a step to the workflow. The first argument is the event type that the step will handle, and the second argument is the previously defined step function:
|
||||
|
||||
```typescript
|
||||
jokeFlow.addStep(StartEvent, generateJoke);
|
||||
```
|
||||
|
||||
### Workflow Exit Points
|
||||
|
||||
```typescript
|
||||
const critiqueJoke = async (_context: Context, ev: JokeEvent) => {
|
||||
const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
|
||||
const response = await llm.complete({ prompt });
|
||||
return new StopEvent({ result: response.text });
|
||||
};
|
||||
```
|
||||
|
||||
Here, we have our second, and last step, in the workflow. We know its the last step because the special `StopEvent` is returned. When the workflow encounters a returned `StopEvent`, it immediately stops the workflow and returns whatever the result was.
|
||||
|
||||
In this case, the result is a string, but it could be a map, array, or any other object.
|
||||
|
||||
Don't forget to add the step to the workflow:
|
||||
|
||||
```typescript
|
||||
jokeFlow.addStep(JokeEvent, critiqueJoke);
|
||||
```
|
||||
|
||||
### Running the Workflow
|
||||
|
||||
```typescript
|
||||
const result = await jokeFlow.run("pirates");
|
||||
console.log(result.data.result);
|
||||
```
|
||||
|
||||
Lastly, we run the workflow. The `.run()` method is async, so we use await here to wait for the result.
|
||||
|
||||
### Validating Workflows
|
||||
|
||||
To tell the workflow what events are produced by each step, you can optionally provide a third argument to `addStep` to specify the output event type:
|
||||
|
||||
```typescript
|
||||
jokeFlow.addStep(StartEvent, generateJoke, { outputs: JokeEvent });
|
||||
jokeFlow.addStep(JokeEvent, critiqueJoke, { outputs: StopEvent });
|
||||
```
|
||||
|
||||
To validate a workflow, you need to call the `validate` method:
|
||||
|
||||
```typescript
|
||||
jokeFlow.validate();
|
||||
```
|
||||
|
||||
To automatically validate a workflow when you run it, you can set the `validate` flag to `true` at initialization:
|
||||
|
||||
```typescript
|
||||
const jokeFlow = new Workflow({ verbose: true, validate: true });
|
||||
```
|
||||
|
||||
## Working with Global Context/State
|
||||
|
||||
Optionally, you can choose to use global context between steps. For example, maybe multiple steps access the original `query` input from the user. You can store this in global context so that every step has access.
|
||||
|
||||
```typescript
|
||||
import { Context } from "@llamaindex/core/workflow";
|
||||
|
||||
const query = async (context: Context, ev: MyEvent) => {
|
||||
// get the query from the context
|
||||
const query = context.get("query");
|
||||
// do something with context and event
|
||||
const val = ...
|
||||
const result = ...
|
||||
// store in context
|
||||
context.set("key", val);
|
||||
|
||||
return new StopEvent({ result });
|
||||
};
|
||||
```
|
||||
|
||||
## Waiting for Multiple Events
|
||||
|
||||
The context does more than just hold data, it also provides utilities to buffer and wait for multiple events.
|
||||
|
||||
For example, you might have a step that waits for a query and retrieved nodes before synthesizing a response:
|
||||
|
||||
```typescript
|
||||
const synthesize = async (context: Context, ev: QueryEvent | RetrieveEvent) => {
|
||||
const events = context.collectEvents(ev, [QueryEvent | RetrieveEvent]);
|
||||
if (!events) {
|
||||
return;
|
||||
}
|
||||
const prompt = events
|
||||
.map((event) => {
|
||||
if (event instanceof QueryEvent) {
|
||||
return `Answer this query using the context provided: ${event.data.query}`;
|
||||
} else if (event instanceof RetrieveEvent) {
|
||||
return `Context: ${event.data.context}`;
|
||||
}
|
||||
return "";
|
||||
})
|
||||
.join("\n");
|
||||
|
||||
const response = await llm.complete({ prompt });
|
||||
return new StopEvent({ result: response.text });
|
||||
};
|
||||
```
|
||||
|
||||
Using `ctx.collectEvents()` we can buffer and wait for ALL expected events to arrive. This function will only return events (in the requested order) once all events have arrived.
|
||||
|
||||
## Manually Triggering Events
|
||||
|
||||
Normally, events are triggered by returning another event during a step. However, events can also be manually dispatched using the `ctx.sendEvent(event)` method within a workflow.
|
||||
|
||||
## Examples
|
||||
|
||||
You can find many useful examples of using workflows in the [examples folder](https://github.com/run-llama/LlamaIndexTS/blob/main/examples/workflow).
|
||||
+14
-14
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "docs",
|
||||
"version": "0.0.57",
|
||||
"version": "0.0.69",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"docusaurus": "docusaurus",
|
||||
@@ -15,29 +15,29 @@
|
||||
"typecheck": "tsc"
|
||||
},
|
||||
"dependencies": {
|
||||
"@docusaurus/core": "3.4.0",
|
||||
"@docusaurus/remark-plugin-npm2yarn": "3.4.0",
|
||||
"@docusaurus/core": "3.5.2",
|
||||
"@docusaurus/remark-plugin-npm2yarn": "3.5.2",
|
||||
"@llamaindex/examples": "workspace:*",
|
||||
"@mdx-js/react": "3.0.1",
|
||||
"clsx": "2.1.1",
|
||||
"llamaindex": "workspace:*",
|
||||
"postcss": "8.4.39",
|
||||
"prism-react-renderer": "2.3.1",
|
||||
"postcss": "8.4.41",
|
||||
"prism-react-renderer": "2.4.0",
|
||||
"raw-loader": "4.0.2",
|
||||
"react": "18.3.1",
|
||||
"react-dom": "18.3.1"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@docusaurus/module-type-aliases": "3.4.0",
|
||||
"@docusaurus/preset-classic": "3.4.0",
|
||||
"@docusaurus/theme-classic": "3.4.0",
|
||||
"@docusaurus/types": "3.4.0",
|
||||
"@docusaurus/module-type-aliases": "3.5.2",
|
||||
"@docusaurus/preset-classic": "3.5.2",
|
||||
"@docusaurus/theme-classic": "3.5.2",
|
||||
"@docusaurus/types": "3.5.2",
|
||||
"@tsconfig/docusaurus": "2.0.3",
|
||||
"@types/node": "^20.12.11",
|
||||
"docusaurus-plugin-typedoc": "1.0.3",
|
||||
"typedoc": "0.26.4",
|
||||
"typedoc-plugin-markdown": "4.1.2",
|
||||
"typescript": "^5.5.3"
|
||||
"@types/node": "^22.5.1",
|
||||
"docusaurus-plugin-typedoc": "1.0.5",
|
||||
"typedoc": "0.26.6",
|
||||
"typedoc-plugin-markdown": "4.2.6",
|
||||
"typescript": "^5.5.4"
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
# examples
|
||||
|
||||
## 0.0.8
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 11feef8: Add workflows
|
||||
- Updated dependencies [11feef8]
|
||||
- @llamaindex/core@0.2.0
|
||||
- llamaindex@0.6.0
|
||||
|
||||
## 0.0.7
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -6,8 +6,8 @@ import {
|
||||
OpenAI,
|
||||
OpenAIAgent,
|
||||
QueryEngineTool,
|
||||
SentenceSplitter,
|
||||
Settings,
|
||||
SimpleNodeParser,
|
||||
SimpleToolNodeMapping,
|
||||
SummaryIndex,
|
||||
VectorStoreIndex,
|
||||
@@ -43,7 +43,7 @@ async function main() {
|
||||
for (const title of wikiTitles) {
|
||||
console.log(`Processing ${title}`);
|
||||
|
||||
const nodes = new SimpleNodeParser({
|
||||
const nodes = new SentenceSplitter({
|
||||
chunkSize: 200,
|
||||
chunkOverlap: 20,
|
||||
}).getNodesFromDocuments([countryDocs[title]]);
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import { ChatResponseChunk, OpenAIAgent } from "llamaindex";
|
||||
import { ReadableStream } from "node:stream/web";
|
||||
import {
|
||||
getCurrentIDTool,
|
||||
getUserInfoTool,
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import { ChatResponseChunk, ReActAgent } from "llamaindex";
|
||||
import { ReadableStream } from "node:stream/web";
|
||||
import {
|
||||
getCurrentIDTool,
|
||||
getUserInfoTool,
|
||||
|
||||
@@ -3,7 +3,7 @@ import { DeepInfraEmbedding } from "llamaindex";
|
||||
async function main() {
|
||||
// API token can be provided as an environment variable too
|
||||
// using DEEPINFRA_API_TOKEN variable
|
||||
const apiToken = "YOUR_API_TOKEN" ?? process.env.DEEPINFRA_API_TOKEN;
|
||||
const apiToken = process.env.DEEPINFRA_API_TOKEN ?? "YOUR_API_TOKEN";
|
||||
const model = "BAAI/bge-large-en-v1.5";
|
||||
const embedModel = new DeepInfraEmbedding({
|
||||
model,
|
||||
|
||||
@@ -2,13 +2,13 @@ import {
|
||||
Document,
|
||||
KeywordExtractor,
|
||||
OpenAI,
|
||||
SimpleNodeParser,
|
||||
SentenceSplitter,
|
||||
} from "llamaindex";
|
||||
|
||||
(async () => {
|
||||
const openaiLLM = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0 });
|
||||
|
||||
const nodeParser = new SimpleNodeParser();
|
||||
const nodeParser = new SentenceSplitter();
|
||||
|
||||
const nodes = nodeParser.getNodesFromDocuments([
|
||||
new Document({ text: "banana apple orange pear peach watermelon" }),
|
||||
|
||||
@@ -2,13 +2,13 @@ import {
|
||||
Document,
|
||||
OpenAI,
|
||||
QuestionsAnsweredExtractor,
|
||||
SimpleNodeParser,
|
||||
SentenceSplitter,
|
||||
} from "llamaindex";
|
||||
|
||||
(async () => {
|
||||
const openaiLLM = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0 });
|
||||
|
||||
const nodeParser = new SimpleNodeParser();
|
||||
const nodeParser = new SentenceSplitter();
|
||||
|
||||
const nodes = nodeParser.getNodesFromDocuments([
|
||||
new Document({
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
import {
|
||||
Document,
|
||||
OpenAI,
|
||||
SimpleNodeParser,
|
||||
SentenceSplitter,
|
||||
SummaryExtractor,
|
||||
} from "llamaindex";
|
||||
|
||||
(async () => {
|
||||
const openaiLLM = new OpenAI({ model: "gpt-3.5-turbo", temperature: 0 });
|
||||
|
||||
const nodeParser = new SimpleNodeParser();
|
||||
const nodeParser = new SentenceSplitter();
|
||||
|
||||
const nodes = nodeParser.getNodesFromDocuments([
|
||||
new Document({
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
import { Document, OpenAI, SimpleNodeParser, TitleExtractor } from "llamaindex";
|
||||
import { Document, OpenAI, SentenceSplitter, TitleExtractor } from "llamaindex";
|
||||
|
||||
import essay from "../essay";
|
||||
|
||||
(async () => {
|
||||
const openaiLLM = new OpenAI({ model: "gpt-3.5-turbo-0125", temperature: 0 });
|
||||
|
||||
const nodeParser = new SimpleNodeParser({});
|
||||
const nodeParser = new SentenceSplitter({});
|
||||
|
||||
const nodes = nodeParser.getNodesFromDocuments([
|
||||
new Document({
|
||||
|
||||
@@ -7,10 +7,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import {\n",
|
||||
" Document,\n",
|
||||
" SimpleNodeParser\n",
|
||||
"} from \"npm:llamaindex\";"
|
||||
"import { Document, SentenceSplitter } from \"npm:llamaindex\";"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -45,7 +42,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"const nodeParser = new SimpleNodeParser();\n",
|
||||
"const nodeParser = new SentenceSplitter();\n",
|
||||
"const nodes = nodeParser.getNodesFromDocuments([\n",
|
||||
" new Document({ text: \"I am 10 years old. John is 20 years old.\" }),\n",
|
||||
"]);\n",
|
||||
|
||||
@@ -2,12 +2,12 @@ import {
|
||||
Document,
|
||||
NodeWithScore,
|
||||
ResponseSynthesizer,
|
||||
SimpleNodeParser,
|
||||
SentenceSplitter,
|
||||
TextNode,
|
||||
} from "llamaindex";
|
||||
|
||||
(async () => {
|
||||
const nodeParser = new SimpleNodeParser();
|
||||
const nodeParser = new SentenceSplitter();
|
||||
const nodes = nodeParser.getNodesFromDocuments([
|
||||
new Document({ text: "I am 10 years old. John is 20 years old." }),
|
||||
]);
|
||||
|
||||
@@ -28,12 +28,23 @@ async function loadAndIndex() {
|
||||
"full_text",
|
||||
]);
|
||||
|
||||
const FILTER_METADATA_FIELD = "content_type";
|
||||
|
||||
documents.forEach((document, index) => {
|
||||
const contentType = ["tweet", "post", "story"][index % 3]; // assign a random content type to each document
|
||||
document.metadata = {
|
||||
...document.metadata,
|
||||
[FILTER_METADATA_FIELD]: contentType,
|
||||
};
|
||||
});
|
||||
|
||||
// create Atlas as a vector store
|
||||
const vectorStore = new MongoDBAtlasVectorSearch({
|
||||
mongodbClient: client,
|
||||
dbName: databaseName,
|
||||
collectionName: vectorCollectionName, // this is where your embeddings will be stored
|
||||
indexName: indexName, // this is the name of the index you will need to create
|
||||
indexedMetadataFields: [FILTER_METADATA_FIELD], // this is the field that will be used for the query
|
||||
});
|
||||
|
||||
// now create an index from all the Documents and store them in Atlas
|
||||
@@ -45,39 +56,4 @@ async function loadAndIndex() {
|
||||
await client.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is document in https://www.mongodb.com/docs/atlas/atlas-search/create-index/#create-an-fts-index-programmatically
|
||||
* But, while testing a 'CommandNotFound' error occurred, so we're not using this here.
|
||||
*/
|
||||
async function createSearchIndex() {
|
||||
const client = new MongoClient(mongoUri);
|
||||
const database = client.db(databaseName);
|
||||
const collection = database.collection(vectorCollectionName);
|
||||
|
||||
// define your Atlas Search index
|
||||
const index = {
|
||||
name: indexName,
|
||||
definition: {
|
||||
/* search index definition fields */
|
||||
mappings: {
|
||||
dynamic: true,
|
||||
fields: [
|
||||
{
|
||||
type: "vector",
|
||||
path: "embedding",
|
||||
numDimensions: 1536,
|
||||
similarity: "cosine",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
};
|
||||
// run the helper method
|
||||
const result = await collection.createSearchIndex(index);
|
||||
console.log("Successfully created search index:", result);
|
||||
await client.close();
|
||||
}
|
||||
|
||||
loadAndIndex().catch(console.error);
|
||||
|
||||
// you can't query your index yet because you need to create a vector search index in mongodb's UI now
|
||||
|
||||
@@ -14,14 +14,26 @@ async function query() {
|
||||
dbName: process.env.MONGODB_DATABASE!,
|
||||
collectionName: process.env.MONGODB_VECTORS!,
|
||||
indexName: process.env.MONGODB_VECTOR_INDEX!,
|
||||
indexedMetadataFields: ["content_type"],
|
||||
});
|
||||
|
||||
const index = await VectorStoreIndex.fromVectorStore(store);
|
||||
|
||||
const retriever = index.asRetriever({ similarityTopK: 20 });
|
||||
const queryEngine = index.asQueryEngine({ retriever });
|
||||
const queryEngine = index.asQueryEngine({
|
||||
retriever,
|
||||
preFilters: {
|
||||
filters: [
|
||||
{
|
||||
key: "content_type",
|
||||
value: "story", // try "tweet" or "post" to see the difference
|
||||
operator: "==",
|
||||
},
|
||||
],
|
||||
},
|
||||
});
|
||||
const result = await queryEngine.query({
|
||||
query: "What does the author think of web frameworks?",
|
||||
query: "What does author receive when he was 11 years old?", // Isaac Asimov's "Foundation" for Christmas
|
||||
});
|
||||
console.log(result.response);
|
||||
await client.close();
|
||||
|
||||
@@ -68,45 +68,6 @@ What you're doing here is creating a Reader which loads the data out of Mongo in
|
||||
|
||||
Now you're creating a vector search client for Mongo. In addition to a MongoDB client object, you again tell it what database everything is in. This time you give it the name of the collection where you'll store the vector embeddings, and the name of the vector search index you'll create in the next step.
|
||||
|
||||
### Create a vector search index
|
||||
|
||||
Now if all has gone well you should be able to log in to the Mongo Atlas UI and see two collections in your database: the original data in `tiny_tweets_collection`, and the vector embeddings in `tiny_tweets_vectors`.
|
||||
|
||||

|
||||
|
||||
Now it's time to create the vector search index so that you can query the data.
|
||||
It's not yet possible to programmatically create a vector search index using the [`createIndex`](https://www.mongodb.com/docs/manual/reference/method/db.collection.createIndex/) function, therefore we have to create one manually in the UI.
|
||||
To do so, first, click the 'Atlas Search' tab, and then click "Create Search Index":
|
||||
|
||||

|
||||
|
||||
We have to use the JSON editor, as the Visual Editor does not yet support to create a vector search index:
|
||||
|
||||

|
||||
|
||||
Now under "database and collection" select `tiny_tweets_db` and within that select `tiny_tweets_vectors`. Then under "Index name" enter `tiny_tweets_vector_index` (or whatever value you put for MONGODB_VECTOR_INDEX in `.env`). Under that, you'll want to enter this JSON object:
|
||||
|
||||
```json
|
||||
{
|
||||
"fields": [
|
||||
{
|
||||
"type": "vector",
|
||||
"path": "embedding",
|
||||
"numDimensions": 1536,
|
||||
"similarity": "cosine"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
This tells Mongo that the `embedding` field in each document (in the `tiny_tweets_vectors` collection) is a vector of 1536 dimensions (this is the size of embeddings used by OpenAI), and that we want to use cosine similarity to compare vectors. You don't need to worry too much about these values unless you want to use a different LLM to OpenAI entirely.
|
||||
|
||||
The UI will ask you to review and confirm your choices, then you need to wait a minute or two while it generates the index. If all goes well, you should see something like this screen:
|
||||
|
||||

|
||||
|
||||
Now you're ready to query your data!
|
||||
|
||||
### Run a test query
|
||||
|
||||
You can do this by running
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
// call pnpm tsx multimodal/load.ts first to init the storage
|
||||
import { OpenAI, Settings, SimpleChatEngine, imageToDataUrl } from "llamaindex";
|
||||
import fs from "node:fs/promises";
|
||||
|
||||
import path from "path";
|
||||
// Update llm
|
||||
Settings.llm = new OpenAI({ model: "gpt-4o-mini", maxTokens: 512 });
|
||||
|
||||
async function main() {
|
||||
const chatEngine = new SimpleChatEngine();
|
||||
|
||||
// Load the image and convert it to a data URL
|
||||
const imagePath = path.join(__dirname, ".", "data", "60.jpg");
|
||||
|
||||
// 1. you can read the buffer from the file
|
||||
const imageBuffer = await fs.readFile(imagePath);
|
||||
const dataUrl = await imageToDataUrl(imageBuffer);
|
||||
// or 2. you can just pass the file path to the imageToDataUrl function
|
||||
// const dataUrl = await imageToDataUrl(imagePath);
|
||||
|
||||
// Update the image_url in the chat message
|
||||
const response = await chatEngine.chat({
|
||||
message: [
|
||||
{
|
||||
type: "text",
|
||||
text: "What is in this image?",
|
||||
},
|
||||
{
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: dataUrl,
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
console.log(response.message.content);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -0,0 +1,13 @@
|
||||
import { OpenAI } from "llamaindex";
|
||||
|
||||
(async () => {
|
||||
const llm = new OpenAI({ model: "o1-preview", temperature: 1 });
|
||||
|
||||
const prompt = `What are three compounds we should consider investigating to advance research
|
||||
into new antibiotics? Why should we consider them?
|
||||
`;
|
||||
|
||||
// complete api
|
||||
const response = await llm.complete({ prompt });
|
||||
console.log(response.text);
|
||||
})();
|
||||
+11
-11
@@ -1,27 +1,27 @@
|
||||
{
|
||||
"name": "@llamaindex/examples",
|
||||
"private": true,
|
||||
"version": "0.0.7",
|
||||
"version": "0.0.8",
|
||||
"dependencies": {
|
||||
"@aws-crypto/sha256-js": "^5.2.0",
|
||||
"@azure/identity": "^4.2.1",
|
||||
"@datastax/astra-db-ts": "^1.2.1",
|
||||
"@llamaindex/core": "^0.1.0",
|
||||
"@azure/identity": "^4.4.1",
|
||||
"@datastax/astra-db-ts": "^1.4.1",
|
||||
"@llamaindex/core": "^0.2.0",
|
||||
"@notionhq/client": "^2.2.15",
|
||||
"@pinecone-database/pinecone": "^2.2.2",
|
||||
"@zilliz/milvus2-sdk-node": "^2.4.4",
|
||||
"@pinecone-database/pinecone": "^3.0.2",
|
||||
"@zilliz/milvus2-sdk-node": "^2.4.6",
|
||||
"chromadb": "^1.8.1",
|
||||
"commander": "^12.1.0",
|
||||
"dotenv": "^16.4.5",
|
||||
"js-tiktoken": "^1.0.12",
|
||||
"llamaindex": "^0.5.0",
|
||||
"js-tiktoken": "^1.0.14",
|
||||
"llamaindex": "^0.6.0",
|
||||
"mongodb": "^6.7.0",
|
||||
"pathe": "^1.1.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.14.1",
|
||||
"tsx": "^4.15.6",
|
||||
"typescript": "^5.5.3"
|
||||
"@types/node": "^22.5.1",
|
||||
"tsx": "^4.19.0",
|
||||
"typescript": "^5.5.4"
|
||||
},
|
||||
"scripts": {
|
||||
"lint": "eslint ."
|
||||
|
||||
@@ -5,7 +5,7 @@ import {
|
||||
IngestionPipeline,
|
||||
MetadataMode,
|
||||
OpenAIEmbedding,
|
||||
SimpleNodeParser,
|
||||
SentenceSplitter,
|
||||
} from "llamaindex";
|
||||
|
||||
async function main() {
|
||||
@@ -18,7 +18,7 @@ async function main() {
|
||||
const document = new Document({ text: essay, id_: path });
|
||||
const pipeline = new IngestionPipeline({
|
||||
transformations: [
|
||||
new SimpleNodeParser({ chunkSize: 1024, chunkOverlap: 20 }),
|
||||
new SentenceSplitter({ chunkSize: 1024, chunkOverlap: 20 }),
|
||||
new OpenAIEmbedding(),
|
||||
],
|
||||
});
|
||||
|
||||
@@ -1,21 +1,22 @@
|
||||
import {
|
||||
Document,
|
||||
PromptTemplate,
|
||||
ResponseSynthesizer,
|
||||
TreeSummarize,
|
||||
TreeSummarizePrompt,
|
||||
VectorStoreIndex,
|
||||
} from "llamaindex";
|
||||
|
||||
const treeSummarizePrompt: TreeSummarizePrompt = ({ context, query }) => {
|
||||
return `Context information from multiple sources is below.
|
||||
const treeSummarizePrompt: TreeSummarizePrompt = new PromptTemplate({
|
||||
template: `Context information from multiple sources is below.
|
||||
---------------------
|
||||
${context}
|
||||
{context}
|
||||
---------------------
|
||||
Given the information from multiple sources and not prior knowledge.
|
||||
Answer the query in the style of a Shakespeare play"
|
||||
Query: ${query}
|
||||
Answer:`;
|
||||
};
|
||||
Query: {query}
|
||||
Answer:`,
|
||||
});
|
||||
|
||||
async function main() {
|
||||
const documents = new Document({
|
||||
|
||||
@@ -14,14 +14,15 @@
|
||||
"start:assemblyai": "node --import tsx ./src/assemblyai.ts",
|
||||
"start:llamaparse-dir": "node --import tsx ./src/simple-directory-reader-with-llamaparse.ts",
|
||||
"start:llamaparse-json": "node --import tsx ./src/llamaparse-json.ts",
|
||||
"start:discord": "node --import tsx ./src/discord.ts"
|
||||
"start:discord": "node --import tsx ./src/discord.ts",
|
||||
"start:json": "node --import tsx ./src/json.ts"
|
||||
},
|
||||
"dependencies": {
|
||||
"llamaindex": "*"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.12.11",
|
||||
"tsx": "^4.15.6",
|
||||
"typescript": "^5.5.3"
|
||||
"@types/node": "^22.5.1",
|
||||
"tsx": "^4.19.0",
|
||||
"typescript": "^5.5.4"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import {
|
||||
CompactAndRefine,
|
||||
OpenAI,
|
||||
PromptTemplate,
|
||||
ResponseSynthesizer,
|
||||
Settings,
|
||||
VectorStoreIndex,
|
||||
@@ -18,14 +19,15 @@ async function main() {
|
||||
// Split text and create embeddings. Store them in a VectorStoreIndex
|
||||
const index = await VectorStoreIndex.fromDocuments(documents);
|
||||
|
||||
const csvPrompt = ({ context = "", query = "" }) => {
|
||||
return `The following CSV file is loaded from ${path}
|
||||
const csvPrompt = new PromptTemplate({
|
||||
templateVars: ["query", "context"],
|
||||
template: `The following CSV file is loaded from ${path}
|
||||
\`\`\`csv
|
||||
${context}
|
||||
{context}
|
||||
\`\`\`
|
||||
Given the CSV file, generate me Typescript code to answer the question: ${query}. You can use built in NodeJS functions but avoid using third party libraries.
|
||||
`;
|
||||
};
|
||||
Given the CSV file, generate me Typescript code to answer the question: {query}. You can use built in NodeJS functions but avoid using third party libraries.
|
||||
`,
|
||||
});
|
||||
|
||||
const responseSynthesizer = new ResponseSynthesizer({
|
||||
responseBuilder: new CompactAndRefine(undefined, csvPrompt),
|
||||
|
||||
@@ -3,6 +3,7 @@ import {
|
||||
ImageNode,
|
||||
LlamaParseReader,
|
||||
OpenAI,
|
||||
PromptTemplate,
|
||||
VectorStoreIndex,
|
||||
} from "llamaindex";
|
||||
import { createMessageContent } from "llamaindex/synthesizers/utils";
|
||||
@@ -50,7 +51,9 @@ async function getImageTextDocs(
|
||||
|
||||
for (const imageDict of imageDicts) {
|
||||
const imageDoc = new ImageNode({ image: imageDict.path });
|
||||
const prompt = () => `Describe the image as alt text`;
|
||||
const prompt = new PromptTemplate({
|
||||
template: `Describe the image as alt text`,
|
||||
});
|
||||
const message = await createMessageContent(prompt, [imageDoc]);
|
||||
|
||||
const response = await llm.complete({
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
import {
|
||||
OpenAI,
|
||||
RouterQueryEngine,
|
||||
SentenceSplitter,
|
||||
Settings,
|
||||
SimpleDirectoryReader,
|
||||
SimpleNodeParser,
|
||||
SummaryIndex,
|
||||
VectorStoreIndex,
|
||||
} from "llamaindex";
|
||||
@@ -12,7 +12,7 @@ import {
|
||||
Settings.llm = new OpenAI();
|
||||
|
||||
// Update node parser
|
||||
Settings.nodeParser = new SimpleNodeParser({
|
||||
Settings.nodeParser = new SentenceSplitter({
|
||||
chunkSize: 1024,
|
||||
});
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import {
|
||||
Document,
|
||||
SentenceSplitter,
|
||||
Settings,
|
||||
SimpleNodeParser,
|
||||
SummaryIndex,
|
||||
SummaryRetrieverMode,
|
||||
} from "llamaindex";
|
||||
@@ -9,7 +9,7 @@ import {
|
||||
import essay from "./essay";
|
||||
|
||||
// Update node parser
|
||||
Settings.nodeParser = new SimpleNodeParser({
|
||||
Settings.nodeParser = new SentenceSplitter({
|
||||
chunkSize: 40,
|
||||
});
|
||||
|
||||
|
||||
@@ -7,8 +7,6 @@
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"strict": true,
|
||||
"skipLibCheck": true,
|
||||
"lib": ["ES2022"],
|
||||
"types": ["node"],
|
||||
"outDir": "./lib",
|
||||
"tsBuildInfoFile": "./lib/.tsbuildinfo",
|
||||
"incremental": true,
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
# Weaviate Vector Store
|
||||
|
||||
Here are two sample scripts which work with loading and querying data from a Weaviate Vector Store.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- An Weaviate Vector Database
|
||||
- Hosted https://weaviate.io/
|
||||
- Self Hosted https://weaviate.io/developers/weaviate/installation/docker-compose#starter-docker-compose-file
|
||||
- An OpenAI API Key
|
||||
|
||||
## Setup
|
||||
|
||||
1. Set your env variables:
|
||||
|
||||
- `WEAVIATE_CLUSTER_URL`: Address of your Weaviate Vector Store (like localhost:8080)
|
||||
- `WEAVIATE_API_KEY`: Your Weaviate API key
|
||||
- `OPENAI_API_KEY`: Your OpenAI key
|
||||
|
||||
2. `cd` Into the `examples` directory
|
||||
3. run `npm i`
|
||||
|
||||
## Load the data
|
||||
|
||||
This sample loads the same dataset of movie reviews as sample dataset
|
||||
|
||||
run `npx tsx weaviate/load`
|
||||
|
||||
## Use RAG to Query the data
|
||||
|
||||
run `npx tsx weaviate/query`
|
||||
@@ -0,0 +1,23 @@
|
||||
import {
|
||||
PapaCSVReader,
|
||||
storageContextFromDefaults,
|
||||
VectorStoreIndex,
|
||||
WeaviateVectorStore,
|
||||
} from "llamaindex";
|
||||
|
||||
const indexName = "MovieReviews";
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const reader = new PapaCSVReader(false);
|
||||
const docs = await reader.loadData("./data/movie_reviews.csv");
|
||||
const vectorStore = new WeaviateVectorStore({ indexName });
|
||||
const storageContext = await storageContextFromDefaults({ vectorStore });
|
||||
await VectorStoreIndex.fromDocuments(docs, { storageContext });
|
||||
console.log("Successfully loaded data into Weaviate");
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
}
|
||||
|
||||
void main();
|
||||
@@ -0,0 +1,46 @@
|
||||
import { VectorStoreIndex, WeaviateVectorStore } from "llamaindex";
|
||||
|
||||
const indexName = "MovieReviews";
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
const query = "Get all movie titles.";
|
||||
const vectorStore = new WeaviateVectorStore({ indexName });
|
||||
const index = await VectorStoreIndex.fromVectorStore(vectorStore);
|
||||
const retriever = index.asRetriever({ similarityTopK: 20 });
|
||||
|
||||
const queryEngine = index.asQueryEngine({ retriever });
|
||||
const results = await queryEngine.query({ query });
|
||||
console.log(`Query from ${results.sourceNodes?.length} nodes`);
|
||||
console.log(results.response);
|
||||
|
||||
console.log("\n=====\nQuerying the index with filters");
|
||||
const queryEngineWithFilters = index.asQueryEngine({
|
||||
retriever,
|
||||
preFilters: {
|
||||
filters: [
|
||||
{
|
||||
key: "document_id",
|
||||
value: "./data/movie_reviews.csv_37",
|
||||
operator: "==",
|
||||
},
|
||||
{
|
||||
key: "document_id",
|
||||
value: "./data/movie_reviews.csv_21",
|
||||
operator: "==",
|
||||
},
|
||||
],
|
||||
condition: "or",
|
||||
},
|
||||
});
|
||||
const resultAfterFilter = await queryEngineWithFilters.query({
|
||||
query: "Get all movie titles.",
|
||||
});
|
||||
console.log(`Query from ${resultAfterFilter.sourceNodes?.length} nodes`);
|
||||
console.log(resultAfterFilter.response);
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
}
|
||||
|
||||
void main();
|
||||
@@ -0,0 +1,7 @@
|
||||
# Workflow Examples
|
||||
|
||||
These examples demonstrate LlamaIndexTS's workflow system. Check out [its documentation](https://ts.llamaindex.ai/modules/workflows) for more information.
|
||||
|
||||
## Running the Examples
|
||||
|
||||
To run the examples, make sure to run them from the parent folder called `examples`). For example, to run the joke workflow, run `npx tsx workflow/joke.ts`.
|
||||
@@ -0,0 +1,122 @@
|
||||
import {
|
||||
Context,
|
||||
StartEvent,
|
||||
StopEvent,
|
||||
Workflow,
|
||||
WorkflowEvent,
|
||||
} from "@llamaindex/core/workflow";
|
||||
import { OpenAI } from "llamaindex";
|
||||
|
||||
const MAX_REVIEWS = 3;
|
||||
|
||||
// Using the o1-preview model (see https://platform.openai.com/docs/guides/reasoning?reasoning-prompt-examples=coding-planning)
|
||||
const llm = new OpenAI({ model: "o1-preview", temperature: 1 });
|
||||
|
||||
// example specification from https://platform.openai.com/docs/guides/reasoning?reasoning-prompt-examples=coding-planning
|
||||
const specification = `Python app that takes user questions and looks them up in a
|
||||
database where they are mapped to answers. If there is a close match, it retrieves
|
||||
the matched answer. If there isn't, it asks the user to provide an answer and
|
||||
stores the question/answer pair in the database.`;
|
||||
|
||||
// Create custom event types
|
||||
export class MessageEvent extends WorkflowEvent<{ msg: string }> {}
|
||||
export class CodeEvent extends WorkflowEvent<{ code: string }> {}
|
||||
export class ReviewEvent extends WorkflowEvent<{
|
||||
review: string;
|
||||
code: string;
|
||||
}> {}
|
||||
|
||||
// Helper function to truncate long strings
|
||||
const truncate = (str: string) => {
|
||||
const MAX_LENGTH = 60;
|
||||
if (str.length <= MAX_LENGTH) return str;
|
||||
return str.slice(0, MAX_LENGTH) + "...";
|
||||
};
|
||||
|
||||
// the architect is responsible for writing the structure and the initial code based on the specification
|
||||
const architect = async (context: Context, ev: StartEvent) => {
|
||||
// get the specification from the start event and save it to context
|
||||
context.set("specification", ev.data.input);
|
||||
const spec = context.get("specification");
|
||||
// write a message to send an update to the user
|
||||
context.writeEventToStream(
|
||||
new MessageEvent({
|
||||
msg: `Writing app using this specification: ${truncate(spec)}`,
|
||||
}),
|
||||
);
|
||||
const prompt = `Build an app for this specification: <spec>${spec}</spec>. Make a plan for the directory structure you'll need, then return each file in full. Don't supply any reasoning, just code.`;
|
||||
const code = await llm.complete({ prompt });
|
||||
return new CodeEvent({ code: code.text });
|
||||
};
|
||||
|
||||
// the coder is responsible for updating the code based on the review
|
||||
const coder = async (context: Context, ev: ReviewEvent) => {
|
||||
// get the specification from the context
|
||||
const spec = context.get("specification");
|
||||
// get the latest review and code
|
||||
const { review, code } = ev.data;
|
||||
// write a message to send an update to the user
|
||||
context.writeEventToStream(
|
||||
new MessageEvent({
|
||||
msg: `Update code based on review: ${truncate(review)}`,
|
||||
}),
|
||||
);
|
||||
const prompt = `We need to improve code that should implement this specification: <spec>${spec}</spec>. Here is the current code: <code>${code}</code>. And here is a review of the code: <review>${review}</review>. Improve the code based on the review, keep the specification in mind, and return the full updated code. Don't supply any reasoning, just code.`;
|
||||
const updatedCode = await llm.complete({ prompt });
|
||||
return new CodeEvent({ code: updatedCode.text });
|
||||
};
|
||||
|
||||
// the reviewer is responsible for reviewing the code and providing feedback
|
||||
const reviewer = async (context: Context, ev: CodeEvent) => {
|
||||
// get the specification from the context
|
||||
const spec = context.get("specification");
|
||||
// get latest code from the event
|
||||
const { code } = ev.data;
|
||||
// update and check the number of reviews
|
||||
const numberReviews = context.get("numberReviews", 0) + 1;
|
||||
context.set("numberReviews", numberReviews);
|
||||
if (numberReviews > MAX_REVIEWS) {
|
||||
// the we've done this too many times - return the code
|
||||
context.writeEventToStream(
|
||||
new MessageEvent({
|
||||
msg: `Already reviewed ${numberReviews - 1} times, stopping!`,
|
||||
}),
|
||||
);
|
||||
return new StopEvent({ result: code });
|
||||
}
|
||||
// write a message to send an update to the user
|
||||
context.writeEventToStream(
|
||||
new MessageEvent({ msg: `Review #${numberReviews}: ${truncate(code)}` }),
|
||||
);
|
||||
const prompt = `Review this code: <code>${code}</code>. Check if the code quality and whether it correctly implements this specification: <spec>${spec}</spec>. If you're satisfied, just return 'Looks great', nothing else. If not, return a review with a list of changes you'd like to see.`;
|
||||
const review = (await llm.complete({ prompt })).text;
|
||||
if (review.includes("Looks great")) {
|
||||
// the reviewer is satisfied with the code, let's return the review
|
||||
context.writeEventToStream(
|
||||
new MessageEvent({
|
||||
msg: `Reviewer says: ${review}`,
|
||||
}),
|
||||
);
|
||||
return new StopEvent({ result: code });
|
||||
}
|
||||
|
||||
return new ReviewEvent({ review, code });
|
||||
};
|
||||
|
||||
const codeAgent = new Workflow({ validate: true });
|
||||
codeAgent.addStep(StartEvent, architect, { outputs: CodeEvent });
|
||||
codeAgent.addStep(ReviewEvent, coder, { outputs: CodeEvent });
|
||||
codeAgent.addStep(CodeEvent, reviewer, { outputs: ReviewEvent });
|
||||
|
||||
// Usage
|
||||
async function main() {
|
||||
const run = codeAgent.run(specification);
|
||||
for await (const event of codeAgent.streamEvents()) {
|
||||
const msg = (event as MessageEvent).data.msg;
|
||||
console.log(`${msg}\n`);
|
||||
}
|
||||
const result = await run;
|
||||
console.log("Final code:\n", result.data.result);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -0,0 +1,70 @@
|
||||
import {
|
||||
Context,
|
||||
StartEvent,
|
||||
StopEvent,
|
||||
Workflow,
|
||||
WorkflowEvent,
|
||||
} from "@llamaindex/core/workflow";
|
||||
import { OpenAI } from "llamaindex";
|
||||
|
||||
// Create LLM instance
|
||||
const llm = new OpenAI();
|
||||
|
||||
// Create custom event types
|
||||
export class JokeEvent extends WorkflowEvent<{ joke: string }> {}
|
||||
export class CritiqueEvent extends WorkflowEvent<{ critique: string }> {}
|
||||
export class AnalysisEvent extends WorkflowEvent<{ analysis: string }> {}
|
||||
|
||||
const generateJoke = async (_context: Context, ev: StartEvent) => {
|
||||
const prompt = `Write your best joke about ${ev.data.input}.`;
|
||||
const response = await llm.complete({ prompt });
|
||||
return new JokeEvent({ joke: response.text });
|
||||
};
|
||||
|
||||
const critiqueJoke = async (_context: Context, ev: JokeEvent) => {
|
||||
const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
|
||||
const response = await llm.complete({ prompt });
|
||||
return new CritiqueEvent({ critique: response.text });
|
||||
};
|
||||
|
||||
const analyzeJoke = async (_context: Context, ev: JokeEvent) => {
|
||||
const prompt = `Give a thorough analysis of the following joke: ${ev.data.joke}`;
|
||||
const response = await llm.complete({ prompt });
|
||||
return new AnalysisEvent({ analysis: response.text });
|
||||
};
|
||||
|
||||
const reportJoke = async (
|
||||
context: Context,
|
||||
ev: AnalysisEvent | CritiqueEvent,
|
||||
) => {
|
||||
const events = context.collectEvents(ev, [AnalysisEvent, CritiqueEvent]);
|
||||
if (!events) {
|
||||
return;
|
||||
}
|
||||
const subPrompts = events.map((event) => {
|
||||
if (event instanceof AnalysisEvent) {
|
||||
return `Analysis: ${event.data.analysis}`;
|
||||
} else if (event instanceof CritiqueEvent) {
|
||||
return `Critique: ${event.data.critique}`;
|
||||
}
|
||||
return "";
|
||||
});
|
||||
|
||||
const prompt = `Based on the following information about a joke:\n${subPrompts.join("\n")}\nProvide a comprehensive report on the joke's quality and impact.`;
|
||||
const response = await llm.complete({ prompt });
|
||||
return new StopEvent({ result: response.text });
|
||||
};
|
||||
|
||||
const jokeFlow = new Workflow();
|
||||
jokeFlow.addStep(StartEvent, generateJoke);
|
||||
jokeFlow.addStep(JokeEvent, critiqueJoke);
|
||||
jokeFlow.addStep(JokeEvent, analyzeJoke);
|
||||
jokeFlow.addStep([AnalysisEvent, CritiqueEvent], reportJoke);
|
||||
|
||||
// Usage
|
||||
async function main() {
|
||||
const result = await jokeFlow.run("pirates");
|
||||
console.log(result.data.result);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -0,0 +1,38 @@
|
||||
import {
|
||||
Context,
|
||||
StartEvent,
|
||||
StopEvent,
|
||||
Workflow,
|
||||
WorkflowEvent,
|
||||
} from "@llamaindex/core/workflow";
|
||||
import { OpenAI } from "llamaindex";
|
||||
|
||||
// Create LLM instance
|
||||
const llm = new OpenAI();
|
||||
|
||||
// Create a custom event type
|
||||
export class JokeEvent extends WorkflowEvent<{ joke: string }> {}
|
||||
|
||||
const generateJoke = async (_context: Context, ev: StartEvent) => {
|
||||
const prompt = `Write your best joke about ${ev.data.input}.`;
|
||||
const response = await llm.complete({ prompt });
|
||||
return new JokeEvent({ joke: response.text });
|
||||
};
|
||||
|
||||
const critiqueJoke = async (_context: Context, ev: JokeEvent) => {
|
||||
const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
|
||||
const response = await llm.complete({ prompt });
|
||||
return new StopEvent({ result: response.text });
|
||||
};
|
||||
|
||||
const jokeFlow = new Workflow({ verbose: true });
|
||||
jokeFlow.addStep(StartEvent, generateJoke);
|
||||
jokeFlow.addStep(JokeEvent, critiqueJoke);
|
||||
|
||||
// Usage
|
||||
async function main() {
|
||||
const result = await jokeFlow.run("pirates");
|
||||
console.log(result.data.result);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -0,0 +1,49 @@
|
||||
import {
|
||||
Context,
|
||||
StartEvent,
|
||||
StopEvent,
|
||||
Workflow,
|
||||
WorkflowEvent,
|
||||
} from "@llamaindex/core/workflow";
|
||||
import { OpenAI } from "llamaindex";
|
||||
|
||||
// Create LLM instance
|
||||
const llm = new OpenAI();
|
||||
|
||||
// Create custom event types
|
||||
export class JokeEvent extends WorkflowEvent<{ joke: string }> {}
|
||||
export class MessageEvent extends WorkflowEvent<{ msg: string }> {}
|
||||
|
||||
const generateJoke = async (context: Context, ev: StartEvent) => {
|
||||
context.writeEventToStream(
|
||||
new MessageEvent({ msg: `Generating a joke about: ${ev.data.input}` }),
|
||||
);
|
||||
const prompt = `Write your best joke about ${ev.data.input}.`;
|
||||
const response = await llm.complete({ prompt });
|
||||
return new JokeEvent({ joke: response.text });
|
||||
};
|
||||
|
||||
const critiqueJoke = async (context: Context, ev: JokeEvent) => {
|
||||
context.writeEventToStream(
|
||||
new MessageEvent({ msg: `Write a critique of this joke: ${ev.data.joke}` }),
|
||||
);
|
||||
const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
|
||||
const response = await llm.complete({ prompt });
|
||||
return new StopEvent({ result: response.text });
|
||||
};
|
||||
|
||||
const jokeFlow = new Workflow();
|
||||
jokeFlow.addStep(StartEvent, generateJoke);
|
||||
jokeFlow.addStep(JokeEvent, critiqueJoke);
|
||||
|
||||
// Usage
|
||||
async function main() {
|
||||
const run = jokeFlow.run("pirates");
|
||||
for await (const event of jokeFlow.streamEvents()) {
|
||||
console.log((event as MessageEvent).data.msg);
|
||||
}
|
||||
const result = await run;
|
||||
console.log(result.data.result);
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -0,0 +1,37 @@
|
||||
import {
|
||||
Context,
|
||||
StartEvent,
|
||||
StopEvent,
|
||||
Workflow,
|
||||
} from "@llamaindex/core/workflow";
|
||||
|
||||
const longRunning = async (_context: Context, ev: StartEvent) => {
|
||||
await new Promise((resolve) => setTimeout(resolve, 2000)); // Wait for 2 seconds
|
||||
return new StopEvent({ result: "We waited 2 seconds" });
|
||||
};
|
||||
|
||||
async function timeout() {
|
||||
const workflow = new Workflow({ verbose: true, timeout: 1 });
|
||||
workflow.addStep(StartEvent, longRunning);
|
||||
// This will timeout
|
||||
try {
|
||||
await workflow.run("Let's start");
|
||||
} catch (error) {
|
||||
console.error(error);
|
||||
}
|
||||
}
|
||||
|
||||
async function notimeout() {
|
||||
// Increase timeout to 3 seconds - no timeout
|
||||
const workflow = new Workflow({ verbose: true, timeout: 3 });
|
||||
workflow.addStep(StartEvent, longRunning);
|
||||
const result = await workflow.run("Let's start");
|
||||
console.log(result.data.result);
|
||||
}
|
||||
|
||||
async function main() {
|
||||
await timeout();
|
||||
await notimeout();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
@@ -0,0 +1,53 @@
|
||||
import {
|
||||
Context,
|
||||
StartEvent,
|
||||
StopEvent,
|
||||
Workflow,
|
||||
WorkflowEvent,
|
||||
} from "@llamaindex/core/workflow";
|
||||
import { OpenAI } from "llamaindex";
|
||||
|
||||
// Create LLM instance
|
||||
const llm = new OpenAI();
|
||||
|
||||
// Create a custom event type
|
||||
export class JokeEvent extends WorkflowEvent<{ joke: string }> {}
|
||||
|
||||
const generateJoke = async (_context: Context, ev: StartEvent) => {
|
||||
const prompt = `Write your best joke about ${ev.data.input}.`;
|
||||
const response = await llm.complete({ prompt });
|
||||
return new JokeEvent({ joke: response.text });
|
||||
};
|
||||
|
||||
const critiqueJoke = async (_context: Context, ev: JokeEvent) => {
|
||||
const prompt = `Give a thorough critique of the following joke: ${ev.data.joke}`;
|
||||
const response = await llm.complete({ prompt });
|
||||
return new StopEvent({ result: response.text });
|
||||
};
|
||||
|
||||
async function validateFails() {
|
||||
try {
|
||||
const jokeFlow = new Workflow({ verbose: true, validate: true });
|
||||
jokeFlow.addStep(StartEvent, generateJoke, { outputs: StopEvent });
|
||||
jokeFlow.addStep(JokeEvent, critiqueJoke, { outputs: StopEvent });
|
||||
await jokeFlow.run("pirates");
|
||||
} catch (e) {
|
||||
console.error("Validation failed:", e);
|
||||
}
|
||||
}
|
||||
|
||||
async function validate() {
|
||||
const jokeFlow = new Workflow({ verbose: true, validate: true });
|
||||
jokeFlow.addStep(StartEvent, generateJoke, { outputs: JokeEvent });
|
||||
jokeFlow.addStep(JokeEvent, critiqueJoke, { outputs: StopEvent });
|
||||
const result = await jokeFlow.run("pirates");
|
||||
console.log(result.data.result);
|
||||
}
|
||||
|
||||
// Usage
|
||||
async function main() {
|
||||
await validateFails();
|
||||
await validate();
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
+14
-12
@@ -19,26 +19,28 @@
|
||||
},
|
||||
"devDependencies": {
|
||||
"@changesets/cli": "^2.27.5",
|
||||
"@typescript-eslint/eslint-plugin": "^7.13.1",
|
||||
"eslint": "^8.57.0",
|
||||
"eslint-config-next": "^14.2.5",
|
||||
"@typescript-eslint/eslint-plugin": "^8.3.0",
|
||||
"eslint": "8.57.0",
|
||||
"eslint-config-next": "^14.2.7",
|
||||
"eslint-config-prettier": "^9.1.0",
|
||||
"eslint-config-turbo": "^2.0.5",
|
||||
"eslint-plugin-react": "7.34.3",
|
||||
"husky": "^9.0.11",
|
||||
"lint-staged": "^15.2.7",
|
||||
"madge": "^7.0.0",
|
||||
"prettier": "^3.3.2",
|
||||
"eslint-config-turbo": "^2.1.0",
|
||||
"eslint-plugin-react": "7.35.0",
|
||||
"husky": "^9.1.5",
|
||||
"lint-staged": "^15.2.9",
|
||||
"madge": "^8.0.0",
|
||||
"prettier": "^3.3.3",
|
||||
"prettier-plugin-organize-imports": "^4.0.0",
|
||||
"turbo": "^2.0.5",
|
||||
"typescript": "^5.5.3"
|
||||
"turbo": "^2.1.0",
|
||||
"typescript": "^5.5.4"
|
||||
},
|
||||
"packageManager": "pnpm@9.5.0",
|
||||
"pnpm": {
|
||||
"overrides": {
|
||||
"trim": "1.0.1",
|
||||
"@babel/traverse": "7.23.2",
|
||||
"protobufjs": "7.2.6"
|
||||
},
|
||||
"patchedDependencies": {
|
||||
"python-format-js@1.4.3": "patches/python-format-js@1.4.3.patch"
|
||||
}
|
||||
},
|
||||
"lint-staged": {
|
||||
|
||||
@@ -1,5 +1,12 @@
|
||||
# @llamaindex/autotool
|
||||
|
||||
## 3.0.0
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [11feef8]
|
||||
- llamaindex@0.6.0
|
||||
|
||||
## 2.0.1
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,5 +1,75 @@
|
||||
# @llamaindex/autotool-01-node-example
|
||||
|
||||
## 0.0.9
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [11feef8]
|
||||
- llamaindex@0.6.0
|
||||
- @llamaindex/autotool@3.0.0
|
||||
|
||||
## 0.0.8
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [7edeb1c]
|
||||
- llamaindex@0.5.27
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.0.7
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ffe0cd1]
|
||||
- Updated dependencies [ffe0cd1]
|
||||
- llamaindex@0.5.26
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.0.6
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [4810364]
|
||||
- Updated dependencies [d3bc663]
|
||||
- llamaindex@0.5.25
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.0.5
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.5.24
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.0.4
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.5.23
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.0.3
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [4648da6]
|
||||
- llamaindex@0.5.22
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.0.2
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ae1149f]
|
||||
- Updated dependencies [2411c9f]
|
||||
- Updated dependencies [e8f229c]
|
||||
- Updated dependencies [11b3856]
|
||||
- Updated dependencies [83d7f41]
|
||||
- Updated dependencies [0148354]
|
||||
- Updated dependencies [1711f6d]
|
||||
- llamaindex@0.5.21
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## null
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -5,13 +5,13 @@
|
||||
"dependencies": {
|
||||
"@llamaindex/autotool": "workspace:*",
|
||||
"llamaindex": "workspace:*",
|
||||
"openai": "^4.52.5"
|
||||
"openai": "^4.57.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"tsx": "^4.15.6"
|
||||
"tsx": "^4.19.0"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
|
||||
},
|
||||
"version": null
|
||||
"version": "0.0.9"
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ const openai = new OpenAI();
|
||||
stream: false,
|
||||
});
|
||||
|
||||
const toolCalls = response.choices[0].message.tool_calls ?? [];
|
||||
const toolCalls = response.choices[0]!.message.tool_calls ?? [];
|
||||
for (const toolCall of toolCalls) {
|
||||
toolCall.function.name;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,109 @@
|
||||
# @llamaindex/autotool-02-next-example
|
||||
|
||||
## 0.1.53
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [11feef8]
|
||||
- llamaindex@0.6.0
|
||||
- @llamaindex/autotool@3.0.0
|
||||
|
||||
## 0.1.52
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [7edeb1c]
|
||||
- llamaindex@0.5.27
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.1.51
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ffe0cd1]
|
||||
- Updated dependencies [ffe0cd1]
|
||||
- llamaindex@0.5.26
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.1.50
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [4810364]
|
||||
- Updated dependencies [d3bc663]
|
||||
- llamaindex@0.5.25
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.1.49
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.5.24
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.1.48
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- llamaindex@0.5.23
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.1.47
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [4648da6]
|
||||
- llamaindex@0.5.22
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.1.46
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [ae1149f]
|
||||
- Updated dependencies [2411c9f]
|
||||
- Updated dependencies [e8f229c]
|
||||
- Updated dependencies [11b3856]
|
||||
- Updated dependencies [83d7f41]
|
||||
- Updated dependencies [0148354]
|
||||
- Updated dependencies [1711f6d]
|
||||
- llamaindex@0.5.21
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.1.45
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [d9d6c56]
|
||||
- Updated dependencies [22ff486]
|
||||
- Updated dependencies [eed0b04]
|
||||
- llamaindex@0.5.20
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.1.44
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [fcbf183]
|
||||
- llamaindex@0.5.19
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.1.43
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [8b66cf4]
|
||||
- llamaindex@0.5.18
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.1.42
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [c654398]
|
||||
- llamaindex@0.5.17
|
||||
- @llamaindex/autotool@2.0.1
|
||||
|
||||
## 0.1.41
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/autotool-02-next-example",
|
||||
"private": true,
|
||||
"version": "0.1.41",
|
||||
"version": "0.1.53",
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
@@ -10,28 +10,28 @@
|
||||
"dependencies": {
|
||||
"@llamaindex/autotool": "workspace:*",
|
||||
"@radix-ui/react-slot": "^1.1.0",
|
||||
"ai": "^3.2.1",
|
||||
"ai": "^3.3.21",
|
||||
"class-variance-authority": "^0.7.0",
|
||||
"dotenv": "^16.3.1",
|
||||
"llamaindex": "workspace:*",
|
||||
"lucide-react": "^0.407.0",
|
||||
"lucide-react": "^0.436.0",
|
||||
"next": "14.3.0-canary.51",
|
||||
"react": "^18.3.1",
|
||||
"react-dom": "^18.3.1",
|
||||
"react-markdown": "^9.0.1",
|
||||
"react-syntax-highlighter": "^15.5.0",
|
||||
"sonner": "^1.5.0",
|
||||
"tailwind-merge": "^2.1.0"
|
||||
"tailwind-merge": "^2.5.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.12.11",
|
||||
"@types/react": "^18.3.3",
|
||||
"@types/node": "^22.5.1",
|
||||
"@types/react": "^18.3.5",
|
||||
"@types/react-dom": "^18.3.0",
|
||||
"@types/react-syntax-highlighter": "^15.5.11",
|
||||
"autoprefixer": "^10.4.16",
|
||||
"autoprefixer": "^10.4.20",
|
||||
"cross-env": "^7.0.3",
|
||||
"postcss": "^8.4.32",
|
||||
"tailwindcss": "^3.4.4",
|
||||
"typescript": "^5.5.3"
|
||||
"postcss": "^8.4.41",
|
||||
"tailwindcss": "^3.4.10",
|
||||
"typescript": "^5.5.4"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/autotool",
|
||||
"type": "module",
|
||||
"version": "2.0.1",
|
||||
"version": "3.0.0",
|
||||
"description": "auto transpile your JS function to LLM Agent compatible",
|
||||
"files": [
|
||||
"dist",
|
||||
@@ -45,13 +45,13 @@
|
||||
"dev": "bunchee --watch"
|
||||
},
|
||||
"dependencies": {
|
||||
"@swc/core": "^1.6.3",
|
||||
"jotai": "^2.8.3",
|
||||
"typedoc": "^0.26.4",
|
||||
"unplugin": "^1.10.1"
|
||||
"@swc/core": "^1.7.22",
|
||||
"jotai": "2.8.4",
|
||||
"typedoc": "^0.26.6",
|
||||
"unplugin": "^1.12.2"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"llamaindex": "^0.5.16",
|
||||
"llamaindex": "^0.6.0",
|
||||
"openai": "^4",
|
||||
"typescript": "^4"
|
||||
},
|
||||
@@ -67,16 +67,16 @@
|
||||
}
|
||||
},
|
||||
"devDependencies": {
|
||||
"@swc/types": "^0.1.8",
|
||||
"@swc/types": "^0.1.12",
|
||||
"@types/json-schema": "^7.0.15",
|
||||
"@types/node": "^20.12.11",
|
||||
"bunchee": "5.3.1",
|
||||
"@types/node": "^22.5.1",
|
||||
"bunchee": "5.3.2",
|
||||
"llamaindex": "workspace:*",
|
||||
"next": "14.2.5",
|
||||
"rollup": "^4.18.0",
|
||||
"tsx": "^4.15.6",
|
||||
"typescript": "^5.5.3",
|
||||
"vitest": "^2.0.2",
|
||||
"webpack": "^5.92.1"
|
||||
"next": "14.2.7",
|
||||
"rollup": "^4.21.2",
|
||||
"tsx": "^4.19.0",
|
||||
"typescript": "^5.5.4",
|
||||
"vitest": "^2.0.5",
|
||||
"webpack": "^5.94.0"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,11 +16,16 @@ const openaiToolsAtom = atom<ChatCompletionTool[]>((get) => {
|
||||
const metadata = get(toolMetadataAtom);
|
||||
return metadata.map(([metadata]) => ({
|
||||
type: "function",
|
||||
function: {
|
||||
parameters: metadata.parameters,
|
||||
name: metadata.name,
|
||||
description: metadata.description,
|
||||
},
|
||||
function: metadata.parameters
|
||||
? {
|
||||
parameters: metadata.parameters,
|
||||
name: metadata.name,
|
||||
description: metadata.description,
|
||||
}
|
||||
: {
|
||||
name: metadata.name,
|
||||
description: metadata.description,
|
||||
},
|
||||
}));
|
||||
});
|
||||
|
||||
|
||||
@@ -17,7 +17,7 @@ export type Info = {
|
||||
* @internal
|
||||
*/
|
||||
export type InfoString = {
|
||||
originalFunction?: string;
|
||||
originalFunction: string | undefined;
|
||||
parameterMapping: Record<string, number>;
|
||||
};
|
||||
|
||||
|
||||
@@ -1,5 +1,17 @@
|
||||
# @llamaindex/cloud
|
||||
|
||||
## 0.2.4
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 4810364: fix: bump version
|
||||
|
||||
## 0.2.3
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 0bf8d80: fix: bump version
|
||||
|
||||
## 0.2.2
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -4,6 +4,7 @@ export default defineConfig({
|
||||
// you can download this file to get the latest version of the OpenAPI document
|
||||
// @link https://api.cloud.llamaindex.ai/api/openapi.json
|
||||
input: "./openapi.json",
|
||||
client: "@hey-api/client-fetch",
|
||||
output: {
|
||||
path: "./src/client",
|
||||
format: "prettier",
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
{
|
||||
"name": "@llamaindex/cloud",
|
||||
"version": "0.2.2",
|
||||
"version": "0.2.4",
|
||||
"type": "module",
|
||||
"license": "MIT",
|
||||
"scripts": {
|
||||
"generate": "pnpm dlx @hey-api/openapi-ts@0.49.0",
|
||||
"generate": "pnpx @hey-api/openapi-ts@0.53.0",
|
||||
"build": "pnpm run generate && bunchee"
|
||||
},
|
||||
"files": [
|
||||
@@ -34,7 +34,8 @@
|
||||
"directory": "packages/cloud"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@hey-api/openapi-ts": "^0.48.0",
|
||||
"bunchee": "5.3.1"
|
||||
"@hey-api/client-fetch": "^0.2.4",
|
||||
"@hey-api/openapi-ts": "^0.53.0",
|
||||
"bunchee": "5.3.2"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,41 @@
|
||||
# @llamaindex/community
|
||||
|
||||
## 0.0.34
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [11feef8]
|
||||
- @llamaindex/core@0.2.0
|
||||
|
||||
## 0.0.33
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [711c814]
|
||||
- @llamaindex/core@0.1.12
|
||||
|
||||
## 0.0.32
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [4648da6]
|
||||
- @llamaindex/env@0.1.10
|
||||
- @llamaindex/core@0.1.11
|
||||
|
||||
## 0.0.31
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [0148354]
|
||||
- @llamaindex/core@0.1.10
|
||||
|
||||
## 0.0.30
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [e27e7dd]
|
||||
- @llamaindex/core@0.1.9
|
||||
|
||||
## 0.0.29
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/community",
|
||||
"description": "Community package for LlamaIndexTS",
|
||||
"version": "0.0.29",
|
||||
"version": "0.0.34",
|
||||
"type": "module",
|
||||
"types": "dist/type/index.d.ts",
|
||||
"main": "dist/cjs/index.js",
|
||||
@@ -42,11 +42,11 @@
|
||||
"dev": "bunchee --watch"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^20.14.2",
|
||||
"bunchee": "5.3.1"
|
||||
"@types/node": "^22.5.1",
|
||||
"bunchee": "5.3.2"
|
||||
},
|
||||
"dependencies": {
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.613.0",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.642.0",
|
||||
"@llamaindex/core": "workspace:*",
|
||||
"@llamaindex/env": "workspace:*"
|
||||
}
|
||||
|
||||
@@ -28,9 +28,9 @@ export const mergeNeighboringSameRoleMessages = (
|
||||
): AnthropicMessage[] => {
|
||||
return messages.reduce(
|
||||
(result: AnthropicMessage[], current: AnthropicMessage, index: number) => {
|
||||
if (index > 0 && messages[index - 1].role === current.role) {
|
||||
result[result.length - 1].content = [
|
||||
...result[result.length - 1].content,
|
||||
if (index > 0 && messages[index - 1]!.role === current.role) {
|
||||
result[result.length - 1]!.content = [
|
||||
...result[result.length - 1]!.content,
|
||||
...current.content,
|
||||
];
|
||||
} else {
|
||||
@@ -128,7 +128,7 @@ export const mapChatMessagesToAnthropicMessages = <
|
||||
);
|
||||
})
|
||||
.filter((message: AnthropicMessage) => {
|
||||
const content = message.content[0];
|
||||
const content = message.content[0]!;
|
||||
if (content.type === "text" && !content.text) return false;
|
||||
if (content.type === "image" && !content.source.data) return false;
|
||||
if (content.type === "image" && message.role === "assistant")
|
||||
@@ -151,12 +151,12 @@ export const extractDataUrlComponents = (
|
||||
} => {
|
||||
const parts = dataUrl.split(";base64,");
|
||||
|
||||
if (parts.length !== 2 || !parts[0].startsWith("data:")) {
|
||||
if (parts.length !== 2 || !parts[0]!.startsWith("data:")) {
|
||||
throw new Error("Invalid data URL");
|
||||
}
|
||||
|
||||
const mimeType = parts[0].slice(5);
|
||||
const base64 = parts[1];
|
||||
const mimeType = parts[0]!.slice(5);
|
||||
const base64 = parts[1]!;
|
||||
|
||||
return {
|
||||
mimeType,
|
||||
|
||||
@@ -153,12 +153,15 @@ export const TOOL_CALL_MODELS = [
|
||||
|
||||
const getProvider = (model: string): Provider => {
|
||||
const providerName = model.split(".")[0];
|
||||
if (!providerName) {
|
||||
throw new Error(`Model ${model} is not supported`);
|
||||
}
|
||||
if (!(providerName in PROVIDERS)) {
|
||||
throw new Error(
|
||||
`Provider ${providerName} for model ${model} is not supported`,
|
||||
);
|
||||
}
|
||||
return PROVIDERS[providerName];
|
||||
return PROVIDERS[providerName]!;
|
||||
};
|
||||
|
||||
export type BedrockModelParams = {
|
||||
|
||||
@@ -34,7 +34,7 @@ export class MetaProvider extends Provider<MetaStreamEvent> {
|
||||
const result = this.getResultFromResponse(response);
|
||||
if (!result.generation.trim().startsWith(TOKENS.TOOL_CALL)) return [];
|
||||
const tool = JSON.parse(
|
||||
result.generation.trim().split(TOKENS.TOOL_CALL)[1],
|
||||
result.generation.trim().split(TOKENS.TOOL_CALL)[1]!,
|
||||
);
|
||||
return [
|
||||
{
|
||||
|
||||
@@ -1,5 +1,38 @@
|
||||
# @llamaindex/core
|
||||
|
||||
## 0.2.0
|
||||
|
||||
### Minor Changes
|
||||
|
||||
- 11feef8: Add workflows
|
||||
|
||||
## 0.1.12
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 711c814: fix: patch `python-format-js`
|
||||
|
||||
## 0.1.11
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- Updated dependencies [4648da6]
|
||||
- @llamaindex/env@0.1.10
|
||||
|
||||
## 0.1.10
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- 0148354: refactor: prompt system
|
||||
|
||||
Add `PromptTemplate` module with strong type check.
|
||||
|
||||
## 0.1.9
|
||||
|
||||
### Patch Changes
|
||||
|
||||
- e27e7dd: chore: bump `natural` to 8.0.1
|
||||
|
||||
## 0.1.8
|
||||
|
||||
### Patch Changes
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@llamaindex/core",
|
||||
"type": "module",
|
||||
"version": "0.1.8",
|
||||
"version": "0.2.0",
|
||||
"description": "LlamaIndex Core Module",
|
||||
"exports": {
|
||||
"./node-parser": {
|
||||
@@ -115,6 +115,48 @@
|
||||
"types": "./dist/utils/index.d.ts",
|
||||
"default": "./dist/utils/index.js"
|
||||
}
|
||||
},
|
||||
"./prompts": {
|
||||
"require": {
|
||||
"types": "./dist/prompts/index.d.cts",
|
||||
"default": "./dist/prompts/index.cjs"
|
||||
},
|
||||
"import": {
|
||||
"types": "./dist/prompts/index.d.ts",
|
||||
"default": "./dist/prompts/index.js"
|
||||
},
|
||||
"default": {
|
||||
"types": "./dist/prompts/index.d.ts",
|
||||
"default": "./dist/prompts/index.js"
|
||||
}
|
||||
},
|
||||
"./indices": {
|
||||
"require": {
|
||||
"types": "./dist/indices/index.d.cts",
|
||||
"default": "./dist/indices/index.cjs"
|
||||
},
|
||||
"import": {
|
||||
"types": "./dist/indices/index.d.ts",
|
||||
"default": "./dist/indices/index.js"
|
||||
},
|
||||
"default": {
|
||||
"types": "./dist/indices/index.d.ts",
|
||||
"default": "./dist/indices/index.js"
|
||||
}
|
||||
},
|
||||
"./workflow": {
|
||||
"require": {
|
||||
"types": "./dist/workflow/index.d.cts",
|
||||
"default": "./dist/workflow/index.cjs"
|
||||
},
|
||||
"import": {
|
||||
"types": "./dist/workflow/index.d.ts",
|
||||
"default": "./dist/workflow/index.js"
|
||||
},
|
||||
"default": {
|
||||
"types": "./dist/workflow/index.d.ts",
|
||||
"default": "./dist/workflow/index.js"
|
||||
}
|
||||
}
|
||||
},
|
||||
"files": [
|
||||
@@ -130,13 +172,14 @@
|
||||
"url": "https://github.com/himself65/LlamaIndexTS.git"
|
||||
},
|
||||
"devDependencies": {
|
||||
"ajv": "^8.16.0",
|
||||
"bunchee": "5.3.1",
|
||||
"natural": "^7.1.0"
|
||||
"ajv": "^8.17.1",
|
||||
"bunchee": "5.3.2",
|
||||
"natural": "^8.0.1",
|
||||
"python-format-js": "^1.4.3"
|
||||
},
|
||||
"dependencies": {
|
||||
"@llamaindex/env": "workspace:*",
|
||||
"@types/node": "^20.14.9",
|
||||
"@types/node": "^22.5.1",
|
||||
"zod": "^3.23.8"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ export abstract class BaseEmbedding extends TransformComponent {
|
||||
const embeddings = await this.getTextEmbeddingsBatch(texts, options);
|
||||
|
||||
for (let i = 0; i < nodes.length; i++) {
|
||||
nodes[i].embedding = embeddings[i];
|
||||
nodes[i]!.embedding = embeddings[i];
|
||||
}
|
||||
|
||||
return nodes;
|
||||
@@ -120,7 +120,7 @@ export async function batchEmbeddings<T>(
|
||||
const curBatch: T[] = [];
|
||||
|
||||
for (let i = 0; i < queue.length; i++) {
|
||||
curBatch.push(queue[i]);
|
||||
curBatch.push(queue[i]!);
|
||||
if (i == queue.length - 1 || curBatch.length == chunkSize) {
|
||||
const embeddings = await embedFunc(curBatch);
|
||||
|
||||
|
||||
@@ -35,20 +35,20 @@ export function similarity(
|
||||
function norm(x: number[]): number {
|
||||
let result = 0;
|
||||
for (let i = 0; i < x.length; i++) {
|
||||
result += x[i] * x[i];
|
||||
result += x[i]! * x[i]!;
|
||||
}
|
||||
return Math.sqrt(result);
|
||||
}
|
||||
|
||||
switch (mode) {
|
||||
case SimilarityType.EUCLIDEAN: {
|
||||
const difference = embedding1.map((x, i) => x - embedding2[i]);
|
||||
const difference = embedding1.map((x, i) => x - embedding2[i]!);
|
||||
return -norm(difference);
|
||||
}
|
||||
case SimilarityType.DOT_PRODUCT: {
|
||||
let result = 0;
|
||||
for (let i = 0; i < embedding1.length; i++) {
|
||||
result += embedding1[i] * embedding2[i];
|
||||
result += embedding1[i]! * embedding2[i]!;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
import { path } from "@llamaindex/env";
|
||||
|
||||
//#region llm
|
||||
export const DEFAULT_CONTEXT_WINDOW = 3900;
|
||||
export const DEFAULT_NUM_OUTPUTS = 256;
|
||||
export const DEFAULT_CHUNK_SIZE = 1024;
|
||||
export const DEFAULT_CHUNK_OVERLAP = 20;
|
||||
export const DEFAULT_CHUNK_OVERLAP_RATIO = 0.1;
|
||||
export const DEFAULT_PADDING = 5;
|
||||
//#endregion
|
||||
//#region storage
|
||||
export const DEFAULT_COLLECTION = "data";
|
||||
export const DEFAULT_PERSIST_DIR = path.join("./storage");
|
||||
export const DEFAULT_INDEX_STORE_PERSIST_FILENAME = "index_store.json";
|
||||
export const DEFAULT_DOC_STORE_PERSIST_FILENAME = "doc_store.json";
|
||||
export const DEFAULT_VECTOR_STORE_PERSIST_FILENAME = "vector_store.json";
|
||||
export const DEFAULT_GRAPH_STORE_PERSIST_FILENAME = "graph_store.json";
|
||||
export const DEFAULT_NAMESPACE = "docstore";
|
||||
export const DEFAULT_IMAGE_VECTOR_NAMESPACE = "images";
|
||||
//#endregion
|
||||
//#region llama cloud
|
||||
export const DEFAULT_PROJECT_NAME = "Default";
|
||||
export const DEFAULT_BASE_URL = "https://api.cloud.llamaindex.ai";
|
||||
//#endregion
|
||||
@@ -1,3 +1,4 @@
|
||||
export * from "./constants";
|
||||
export { Settings } from "./settings";
|
||||
export { CallbackManager } from "./settings/callback-manager";
|
||||
export type {
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { Tokenizer } from "@llamaindex/env";
|
||||
import type { LLM } from "../llms";
|
||||
import {
|
||||
type CallbackManager,
|
||||
getCallbackManager,
|
||||
@@ -10,6 +11,7 @@ import {
|
||||
setChunkSize,
|
||||
withChunkSize,
|
||||
} from "./settings/chunk-size";
|
||||
import { getLLM, setLLM, withLLM } from "./settings/llm";
|
||||
import {
|
||||
getTokenizer,
|
||||
setTokenizer,
|
||||
@@ -17,6 +19,15 @@ import {
|
||||
} from "./settings/tokenizer";
|
||||
|
||||
export const Settings = {
|
||||
get llm() {
|
||||
return getLLM();
|
||||
},
|
||||
set llm(llm) {
|
||||
setLLM(llm);
|
||||
},
|
||||
withLLM<Result>(llm: LLM, fn: () => Result): Result {
|
||||
return withLLM(llm, fn);
|
||||
},
|
||||
get tokenizer() {
|
||||
return getTokenizer();
|
||||
},
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
import { AsyncLocalStorage } from "@llamaindex/env";
|
||||
import type { LLM } from "../../llms";
|
||||
|
||||
const llmAsyncLocalStorage = new AsyncLocalStorage<LLM>();
|
||||
let globalLLM: LLM | undefined;
|
||||
|
||||
export function getLLM(): LLM {
|
||||
const currentLLM = globalLLM ?? llmAsyncLocalStorage.getStore();
|
||||
if (!currentLLM) {
|
||||
throw new Error(
|
||||
"Cannot find LLM, please set `Settings.llm = ...` on the top of your code",
|
||||
);
|
||||
}
|
||||
return currentLLM;
|
||||
}
|
||||
|
||||
export function setLLM(llm: LLM): void {
|
||||
globalLLM = llm;
|
||||
}
|
||||
|
||||
export function withLLM<Result>(llm: LLM, fn: () => Result): Result {
|
||||
return llmAsyncLocalStorage.run(llm, fn);
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
export {
|
||||
PromptHelper,
|
||||
getBiggestPrompt,
|
||||
type PromptHelperOptions,
|
||||
} from "./prompt-helper";
|
||||
+44
-37
@@ -1,31 +1,45 @@
|
||||
import { SentenceSplitter } from "@llamaindex/core/node-parser";
|
||||
import { type Tokenizer, tokenizers } from "@llamaindex/env";
|
||||
import type { SimplePrompt } from "./Prompt.js";
|
||||
import {
|
||||
DEFAULT_CHUNK_OVERLAP_RATIO,
|
||||
DEFAULT_CONTEXT_WINDOW,
|
||||
DEFAULT_NUM_OUTPUTS,
|
||||
DEFAULT_PADDING,
|
||||
} from "./constants.js";
|
||||
} from "../global";
|
||||
import { SentenceSplitter } from "../node-parser";
|
||||
import type { PromptTemplate } from "../prompts";
|
||||
|
||||
export function getEmptyPromptTxt(prompt: SimplePrompt) {
|
||||
return prompt({});
|
||||
/**
|
||||
* Get the empty prompt text given a prompt.
|
||||
*/
|
||||
function getEmptyPromptTxt(prompt: PromptTemplate) {
|
||||
return prompt.format({
|
||||
...Object.fromEntries(
|
||||
[...prompt.templateVars.keys()].map((key) => [key, ""]),
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get biggest empty prompt size from a list of prompts.
|
||||
* Used to calculate the maximum size of inputs to the LLM.
|
||||
* @param prompts
|
||||
* @returns
|
||||
*/
|
||||
export function getBiggestPrompt(prompts: SimplePrompt[]) {
|
||||
export function getBiggestPrompt(prompts: PromptTemplate[]): PromptTemplate {
|
||||
const emptyPromptTexts = prompts.map(getEmptyPromptTxt);
|
||||
const emptyPromptLengths = emptyPromptTexts.map((text) => text.length);
|
||||
const maxEmptyPromptLength = Math.max(...emptyPromptLengths);
|
||||
const maxEmptyPromptIndex = emptyPromptLengths.indexOf(maxEmptyPromptLength);
|
||||
return prompts[maxEmptyPromptIndex];
|
||||
return prompts[maxEmptyPromptIndex]!;
|
||||
}
|
||||
|
||||
export type PromptHelperOptions = {
|
||||
contextWindow?: number;
|
||||
numOutput?: number;
|
||||
chunkOverlapRatio?: number;
|
||||
chunkSizeLimit?: number;
|
||||
tokenizer?: Tokenizer;
|
||||
separator?: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* A collection of helper functions for working with prompts.
|
||||
*/
|
||||
@@ -33,19 +47,19 @@ export class PromptHelper {
|
||||
contextWindow = DEFAULT_CONTEXT_WINDOW;
|
||||
numOutput = DEFAULT_NUM_OUTPUTS;
|
||||
chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO;
|
||||
chunkSizeLimit?: number;
|
||||
chunkSizeLimit: number | undefined;
|
||||
tokenizer: Tokenizer;
|
||||
separator = " ";
|
||||
|
||||
// eslint-disable-next-line max-params
|
||||
constructor(
|
||||
contextWindow = DEFAULT_CONTEXT_WINDOW,
|
||||
numOutput = DEFAULT_NUM_OUTPUTS,
|
||||
chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO,
|
||||
chunkSizeLimit?: number,
|
||||
tokenizer?: Tokenizer,
|
||||
separator = " ",
|
||||
) {
|
||||
constructor(options: PromptHelperOptions = {}) {
|
||||
const {
|
||||
contextWindow = DEFAULT_CONTEXT_WINDOW,
|
||||
numOutput = DEFAULT_NUM_OUTPUTS,
|
||||
chunkOverlapRatio = DEFAULT_CHUNK_OVERLAP_RATIO,
|
||||
chunkSizeLimit,
|
||||
tokenizer,
|
||||
separator = " ",
|
||||
} = options;
|
||||
this.contextWindow = contextWindow;
|
||||
this.numOutput = numOutput;
|
||||
this.chunkOverlapRatio = chunkOverlapRatio;
|
||||
@@ -59,7 +73,7 @@ export class PromptHelper {
|
||||
* @param prompt
|
||||
* @returns
|
||||
*/
|
||||
private getAvailableContextSize(prompt: SimplePrompt) {
|
||||
private getAvailableContextSize(prompt: PromptTemplate) {
|
||||
const emptyPromptText = getEmptyPromptTxt(prompt);
|
||||
const promptTokens = this.tokenizer.encode(emptyPromptText);
|
||||
const numPromptTokens = promptTokens.length;
|
||||
@@ -69,16 +83,12 @@ export class PromptHelper {
|
||||
|
||||
/**
|
||||
* Find the maximum size of each chunk given a prompt.
|
||||
* @param prompt
|
||||
* @param numChunks
|
||||
* @param padding
|
||||
* @returns
|
||||
*/
|
||||
private getAvailableChunkSize(
|
||||
prompt: SimplePrompt,
|
||||
prompt: PromptTemplate,
|
||||
numChunks = 1,
|
||||
padding = 5,
|
||||
) {
|
||||
): number {
|
||||
const availableContextSize = this.getAvailableContextSize(prompt);
|
||||
|
||||
const result = Math.floor(availableContextSize / numChunks) - padding;
|
||||
@@ -92,13 +102,9 @@ export class PromptHelper {
|
||||
|
||||
/**
|
||||
* Creates a text splitter with the correct chunk sizes and overlaps given a prompt.
|
||||
* @param prompt
|
||||
* @param numChunks
|
||||
* @param padding
|
||||
* @returns
|
||||
*/
|
||||
getTextSplitterGivenPrompt(
|
||||
prompt: SimplePrompt,
|
||||
prompt: PromptTemplate,
|
||||
numChunks = 1,
|
||||
padding = DEFAULT_PADDING,
|
||||
) {
|
||||
@@ -107,18 +113,19 @@ export class PromptHelper {
|
||||
throw new Error("Got 0 as available chunk size");
|
||||
}
|
||||
const chunkOverlap = this.chunkOverlapRatio * chunkSize;
|
||||
return new SentenceSplitter({ chunkSize, chunkOverlap });
|
||||
return new SentenceSplitter({
|
||||
chunkSize,
|
||||
chunkOverlap,
|
||||
separator: this.separator,
|
||||
tokenizer: this.tokenizer,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Repack resplits the strings based on the optimal text splitter.
|
||||
* @param prompt
|
||||
* @param textChunks
|
||||
* @param padding
|
||||
* @returns
|
||||
*/
|
||||
repack(
|
||||
prompt: SimplePrompt,
|
||||
prompt: PromptTemplate,
|
||||
textChunks: string[],
|
||||
padding = DEFAULT_PADDING,
|
||||
) {
|
||||
@@ -103,7 +103,7 @@ export type LLMMetadata = {
|
||||
model: string;
|
||||
temperature: number;
|
||||
topP: number;
|
||||
maxTokens?: number;
|
||||
maxTokens?: number | undefined;
|
||||
contextWindow: number;
|
||||
tokenizer: Tokenizers | undefined;
|
||||
};
|
||||
@@ -141,7 +141,7 @@ export interface LLMCompletionParamsStreaming extends LLMCompletionParamsBase {
|
||||
|
||||
export interface LLMCompletionParamsNonStreaming
|
||||
extends LLMCompletionParamsBase {
|
||||
stream?: false | null;
|
||||
stream?: false | null | undefined;
|
||||
}
|
||||
|
||||
export type MessageContentTextDetail = {
|
||||
|
||||
@@ -122,7 +122,7 @@ export abstract class MetadataAwareTextSplitter extends TextSplitter {
|
||||
throw new TypeError("`texts` and `metadata` must have the same length");
|
||||
}
|
||||
return texts.flatMap((text, i) =>
|
||||
this.splitTextMetadataAware(text, metadata[i]),
|
||||
this.splitTextMetadataAware(text, metadata[i]!),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -35,8 +35,8 @@ export class MarkdownNodeParser extends NodeParser {
|
||||
}
|
||||
metadata = this.updateMetadata(
|
||||
metadata,
|
||||
headerMatch[2],
|
||||
headerMatch[1].trim().length,
|
||||
headerMatch[2]!,
|
||||
headerMatch[1]!.trim().length,
|
||||
);
|
||||
currentSection = `${headerMatch[2]}\n`;
|
||||
} else {
|
||||
@@ -63,7 +63,7 @@ export class MarkdownNodeParser extends NodeParser {
|
||||
for (let i = 1; i < newHeaderLevel; i++) {
|
||||
const key = `Header_${i}`;
|
||||
if (key in headersMetadata) {
|
||||
updatedHeaders[key] = headersMetadata[key];
|
||||
updatedHeaders[key] = headersMetadata[key]!;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,10 +76,10 @@ export class MarkdownNodeParser extends NodeParser {
|
||||
node: TextNode,
|
||||
metadata: Metadata,
|
||||
): TextNode {
|
||||
const newNode = buildNodeFromSplits([textSplit], node, undefined)[0];
|
||||
const newNode = buildNodeFromSplits([textSplit], node, undefined)[0]!;
|
||||
|
||||
if (this.includeMetadata) {
|
||||
newNode.metadata = { ...newNode.metadata, ...metadata };
|
||||
newNode.metadata = { ...newNode!.metadata, ...metadata };
|
||||
}
|
||||
|
||||
return newNode;
|
||||
|
||||
@@ -168,9 +168,9 @@ export class SentenceSplitter extends MetadataAwareTextSplitter {
|
||||
let lastIndex = lastChunk.length - 1;
|
||||
while (
|
||||
lastIndex >= 0 &&
|
||||
currentChunkLength + lastChunk[lastIndex][1] <= this.chunkOverlap
|
||||
currentChunkLength + lastChunk[lastIndex]![1] <= this.chunkOverlap
|
||||
) {
|
||||
const [text, length] = lastChunk[lastIndex];
|
||||
const [text, length] = lastChunk[lastIndex]!;
|
||||
currentChunkLength += length;
|
||||
currentChunk.unshift([text, length]);
|
||||
lastIndex -= 1;
|
||||
@@ -178,7 +178,7 @@ export class SentenceSplitter extends MetadataAwareTextSplitter {
|
||||
};
|
||||
|
||||
while (splits.length > 0) {
|
||||
const curSplit = splits[0];
|
||||
const curSplit = splits[0]!;
|
||||
if (curSplit.tokenSize > chunkSize) {
|
||||
throw new Error("Single token exceeded chunk size");
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
+1
@@ -1,4 +1,5 @@
|
||||
declare class SentenceTokenizer {
|
||||
constructor(abbreviations?: string[]);
|
||||
tokenize(text: string): string[];
|
||||
}
|
||||
|
||||
@@ -0,0 +1,222 @@
|
||||
var __getOwnPropNames = Object.getOwnPropertyNames;
|
||||
var __commonJS = (cb, mod) =>
|
||||
function __require() {
|
||||
return (
|
||||
mod ||
|
||||
(0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod),
|
||||
mod.exports
|
||||
);
|
||||
};
|
||||
|
||||
// lib/natural/tokenizers/tokenizer.js
|
||||
var require_tokenizer = __commonJS({
|
||||
"lib/natural/tokenizers/tokenizer.js"(exports, module) {
|
||||
"use strict";
|
||||
var Tokenizer = class {
|
||||
trim(array) {
|
||||
while (array[array.length - 1] === "") {
|
||||
array.pop();
|
||||
}
|
||||
while (array[0] === "") {
|
||||
array.shift();
|
||||
}
|
||||
return array;
|
||||
}
|
||||
};
|
||||
module.exports = Tokenizer;
|
||||
},
|
||||
});
|
||||
|
||||
// lib/natural/tokenizers/sentence_tokenizer.js
|
||||
var require_sentence_tokenizer = __commonJS({
|
||||
"lib/natural/tokenizers/sentence_tokenizer.js"(exports, module) {
|
||||
var Tokenizer = require_tokenizer();
|
||||
var NUM = "NUMBER";
|
||||
var DELIM = "DELIM";
|
||||
var URI = "URI";
|
||||
var ABBREV = "ABBREV";
|
||||
var DEBUG = false;
|
||||
function generateUniqueCode(base, index) {
|
||||
return `{{${base}_${index}}}`;
|
||||
}
|
||||
function escapeRegExp(string) {
|
||||
return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
||||
}
|
||||
var SentenceTokenizer = class extends Tokenizer {
|
||||
constructor(abbreviations) {
|
||||
super();
|
||||
if (abbreviations) {
|
||||
this.abbreviations = abbreviations;
|
||||
} else {
|
||||
this.abbreviations = [];
|
||||
}
|
||||
this.replacementMap = null;
|
||||
this.replacementCounter = 0;
|
||||
}
|
||||
replaceUrisWithPlaceholders(text) {
|
||||
const urlPattern =
|
||||
/(https?:\/\/\S+|www\.\S+|ftp:\/\/\S+|(mailto:)?[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}|file:\/\/\S+)/gi;
|
||||
const modifiedText = text.replace(urlPattern, (match) => {
|
||||
const placeholder = generateUniqueCode(
|
||||
URI,
|
||||
this.replacementCounter++,
|
||||
);
|
||||
this.replacementMap.set(placeholder, match);
|
||||
return placeholder;
|
||||
});
|
||||
return modifiedText;
|
||||
}
|
||||
replaceAbbreviations(text) {
|
||||
if (this.abbreviations.length === 0) {
|
||||
return text;
|
||||
}
|
||||
const pattern = new RegExp(
|
||||
`(${this.abbreviations.map((abbrev) => escapeRegExp(abbrev)).join("|")})`,
|
||||
"gi",
|
||||
);
|
||||
const replacedText = text.replace(pattern, (match) => {
|
||||
const code = generateUniqueCode(ABBREV, this.replacementCounter++);
|
||||
this.replacementMap.set(code, match);
|
||||
return code;
|
||||
});
|
||||
return replacedText;
|
||||
}
|
||||
replaceDelimitersWithPlaceholders(text) {
|
||||
const delimiterPattern = /([.?!… ]*)([.?!…])(["'”’)}\]]?)/g;
|
||||
const modifiedText = text.replace(
|
||||
delimiterPattern,
|
||||
(match, p1, p2, p3) => {
|
||||
const placeholder = generateUniqueCode(
|
||||
DELIM,
|
||||
this.replacementCounter++,
|
||||
);
|
||||
this.delimiterMap.set(placeholder, p1 + p2 + p3);
|
||||
return placeholder;
|
||||
},
|
||||
);
|
||||
return modifiedText;
|
||||
}
|
||||
splitOnPlaceholders(text, placeholders) {
|
||||
if (this.delimiterMap.size === 0) {
|
||||
return [text];
|
||||
}
|
||||
const keys = Array.from(this.delimiterMap.keys());
|
||||
const pattern = new RegExp(`(${keys.map(escapeRegExp).join("|")})`);
|
||||
const parts = text.split(pattern);
|
||||
const sentences = [];
|
||||
for (let i = 0; i < parts.length; i += 2) {
|
||||
const sentence = parts[i];
|
||||
const placeholder = parts[i + 1] || "";
|
||||
sentences.push(sentence + placeholder);
|
||||
}
|
||||
return sentences;
|
||||
}
|
||||
replaceNumbersWithCode(text) {
|
||||
const numberPattern = /\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b/g;
|
||||
const replacedText = text.replace(numberPattern, (match) => {
|
||||
const code = generateUniqueCode(NUM, this.replacementCounter++);
|
||||
this.replacementMap.set(code, match);
|
||||
return code;
|
||||
});
|
||||
return replacedText;
|
||||
}
|
||||
revertReplacements(text) {
|
||||
let originalText = text;
|
||||
for (const [
|
||||
placeholder,
|
||||
replacement,
|
||||
] of this.replacementMap.entries()) {
|
||||
const pattern = new RegExp(escapeRegExp(placeholder), "g");
|
||||
originalText = originalText.replace(pattern, replacement);
|
||||
}
|
||||
return originalText;
|
||||
}
|
||||
revertDelimiters(text) {
|
||||
let originalText = text;
|
||||
for (const [placeholder, replacement] of this.delimiterMap.entries()) {
|
||||
const pattern = new RegExp(escapeRegExp(placeholder), "g");
|
||||
originalText = originalText.replace(pattern, replacement);
|
||||
}
|
||||
return originalText;
|
||||
}
|
||||
tokenize(text) {
|
||||
this.replacementCounter = 0;
|
||||
this.replacementMap = /* @__PURE__ */ new Map();
|
||||
this.delimiterMap = /* @__PURE__ */ new Map();
|
||||
DEBUG &&
|
||||
console.log(
|
||||
"---Start of sentence tokenization-----------------------",
|
||||
);
|
||||
DEBUG && console.log("Original input: >>>" + text + "<<<");
|
||||
const result1 = this.replaceAbbreviations(text);
|
||||
DEBUG &&
|
||||
console.log(
|
||||
"Phase 1: replacing abbreviations: " + JSON.stringify(result1),
|
||||
);
|
||||
const result2 = this.replaceUrisWithPlaceholders(result1);
|
||||
DEBUG &&
|
||||
console.log("Phase 2: replacing URIs: " + JSON.stringify(result2));
|
||||
const result3 = this.replaceNumbersWithCode(result2);
|
||||
DEBUG &&
|
||||
console.log(
|
||||
"Phase 3: replacing numbers with placeholders: " +
|
||||
JSON.stringify(result3),
|
||||
);
|
||||
const result4 = this.replaceDelimitersWithPlaceholders(result3);
|
||||
DEBUG &&
|
||||
console.log(
|
||||
"Phase 4: replacing delimiters with placeholders: " +
|
||||
JSON.stringify(result4),
|
||||
);
|
||||
const sentences = this.splitOnPlaceholders(result4);
|
||||
DEBUG &&
|
||||
console.log(
|
||||
"Phase 5: splitting into sentences on placeholders: " +
|
||||
JSON.stringify(sentences),
|
||||
);
|
||||
const newSentences = sentences.map((s) => {
|
||||
const s1 = this.revertReplacements(s);
|
||||
return this.revertDelimiters(s1);
|
||||
});
|
||||
DEBUG &&
|
||||
console.log(
|
||||
"Phase 6: replacing back abbreviations, URIs, numbers and delimiters: " +
|
||||
JSON.stringify(newSentences),
|
||||
);
|
||||
const trimmedSentences = this.trim(newSentences);
|
||||
DEBUG &&
|
||||
console.log(
|
||||
"Phase 7: trimming array of empty sentences: " +
|
||||
JSON.stringify(trimmedSentences),
|
||||
);
|
||||
const trimmedSentences2 = trimmedSentences.map((sent) => sent.trim());
|
||||
DEBUG &&
|
||||
console.log(
|
||||
"Phase 8: trimming sentences from surrounding whitespace: " +
|
||||
JSON.stringify(trimmedSentences2),
|
||||
);
|
||||
DEBUG &&
|
||||
console.log(
|
||||
"---End of sentence tokenization--------------------------",
|
||||
);
|
||||
DEBUG &&
|
||||
console.log(
|
||||
"---Replacement map---------------------------------------",
|
||||
);
|
||||
DEBUG && console.log([...this.replacementMap.entries()]);
|
||||
DEBUG &&
|
||||
console.log(
|
||||
"---Delimiter map-----------------------------------------",
|
||||
);
|
||||
DEBUG && console.log([...this.delimiterMap.entries()]);
|
||||
DEBUG &&
|
||||
console.log(
|
||||
"---------------------------------------------------------",
|
||||
);
|
||||
return trimmedSentences2;
|
||||
}
|
||||
};
|
||||
module.exports = SentenceTokenizer;
|
||||
},
|
||||
});
|
||||
export default require_sentence_tokenizer();
|
||||
@@ -1,11 +1,11 @@
|
||||
import type { TextSplitter } from "./base";
|
||||
import SentenceTokenizerNew from "./sentence-tokenizer-parser.js";
|
||||
import SentenceTokenizer from "./sentence_tokenizer";
|
||||
|
||||
export type TextSplitterFn = (text: string) => string[];
|
||||
|
||||
const truncateText = (text: string, textSplitter: TextSplitter): string => {
|
||||
const chunks = textSplitter.splitText(text);
|
||||
return chunks[0];
|
||||
return chunks[0] ?? text;
|
||||
};
|
||||
|
||||
const splitTextKeepSeparator = (text: string, separator: string): string[] => {
|
||||
@@ -31,11 +31,17 @@ export const splitByChar = (): TextSplitterFn => {
|
||||
return (text: string) => text.split("");
|
||||
};
|
||||
|
||||
let sentenceTokenizer: SentenceTokenizerNew | null = null;
|
||||
let sentenceTokenizer: SentenceTokenizer | null = null;
|
||||
|
||||
export const splitBySentenceTokenizer = (): TextSplitterFn => {
|
||||
if (!sentenceTokenizer) {
|
||||
sentenceTokenizer = new SentenceTokenizerNew();
|
||||
sentenceTokenizer = new SentenceTokenizer([
|
||||
"i.e.",
|
||||
"etc.",
|
||||
"vs.",
|
||||
"Inc.",
|
||||
"A.S.A.P.",
|
||||
]);
|
||||
}
|
||||
const tokenizer = sentenceTokenizer;
|
||||
return (text: string) => {
|
||||
|
||||
@@ -0,0 +1,225 @@
|
||||
import format from "python-format-js";
|
||||
import type { ChatMessage } from "../llms";
|
||||
import type { BaseOutputParser, Metadata } from "../schema";
|
||||
import { objectEntries } from "../utils";
|
||||
import { PromptType } from "./prompt-type";
|
||||
|
||||
type MappingFn<TemplatesVar extends string[] = string[]> = (
|
||||
options: Record<TemplatesVar[number], string>,
|
||||
) => string;
|
||||
|
||||
export type BasePromptTemplateOptions<
|
||||
TemplatesVar extends readonly string[],
|
||||
Vars extends readonly string[],
|
||||
> = {
|
||||
metadata?: Metadata;
|
||||
templateVars?:
|
||||
| TemplatesVar
|
||||
// loose type for better type inference
|
||||
| readonly string[];
|
||||
options?: Partial<Record<TemplatesVar[number] | (string & {}), string>>;
|
||||
outputParser?: BaseOutputParser | undefined;
|
||||
templateVarMappings?: Partial<
|
||||
Record<Vars[number] | (string & {}), TemplatesVar[number] | (string & {})>
|
||||
>;
|
||||
functionMappings?: Partial<
|
||||
Record<TemplatesVar[number] | (string & {}), MappingFn>
|
||||
>;
|
||||
};
|
||||
|
||||
export abstract class BasePromptTemplate<
|
||||
const TemplatesVar extends readonly string[] = string[],
|
||||
const Vars extends readonly string[] = string[],
|
||||
> {
|
||||
metadata: Metadata = {};
|
||||
templateVars: Set<string> = new Set();
|
||||
options: Partial<Record<TemplatesVar[number] | (string & {}), string>> = {};
|
||||
outputParser: BaseOutputParser | undefined;
|
||||
templateVarMappings: Partial<
|
||||
Record<Vars[number] | (string & {}), TemplatesVar[number] | (string & {})>
|
||||
> = {};
|
||||
functionMappings: Partial<
|
||||
Record<TemplatesVar[number] | (string & {}), MappingFn>
|
||||
> = {};
|
||||
|
||||
protected constructor(
|
||||
options: BasePromptTemplateOptions<TemplatesVar, Vars>,
|
||||
) {
|
||||
const {
|
||||
metadata,
|
||||
templateVars,
|
||||
outputParser,
|
||||
templateVarMappings,
|
||||
functionMappings,
|
||||
} = options;
|
||||
if (metadata) {
|
||||
this.metadata = metadata;
|
||||
}
|
||||
if (templateVars) {
|
||||
this.templateVars = new Set(templateVars);
|
||||
}
|
||||
if (options.options) {
|
||||
this.options = options.options;
|
||||
}
|
||||
this.outputParser = outputParser;
|
||||
if (templateVarMappings) {
|
||||
this.templateVarMappings = templateVarMappings;
|
||||
}
|
||||
if (functionMappings) {
|
||||
this.functionMappings = functionMappings;
|
||||
}
|
||||
}
|
||||
|
||||
protected mapTemplateVars(
|
||||
options: Record<TemplatesVar[number] | (string & {}), string>,
|
||||
) {
|
||||
const templateVarMappings = this.templateVarMappings;
|
||||
return Object.fromEntries(
|
||||
objectEntries(options).map(([k, v]) => [templateVarMappings[k] || k, v]),
|
||||
);
|
||||
}
|
||||
|
||||
protected mapFunctionVars(
|
||||
options: Record<TemplatesVar[number] | (string & {}), string>,
|
||||
) {
|
||||
const functionMappings = this.functionMappings;
|
||||
const newOptions = {} as Record<TemplatesVar[number], string>;
|
||||
for (const [k, v] of objectEntries(functionMappings)) {
|
||||
newOptions[k] = v!(options);
|
||||
}
|
||||
|
||||
for (const [k, v] of objectEntries(options)) {
|
||||
if (!(k in newOptions)) {
|
||||
newOptions[k] = v;
|
||||
}
|
||||
}
|
||||
|
||||
return newOptions;
|
||||
}
|
||||
|
||||
protected mapAllVars(
|
||||
options: Record<TemplatesVar[number] | (string & {}), string>,
|
||||
): Record<string, string> {
|
||||
const newOptions = this.mapFunctionVars(options);
|
||||
return this.mapTemplateVars(newOptions);
|
||||
}
|
||||
|
||||
abstract partialFormat(
|
||||
options: Partial<Record<TemplatesVar[number] | (string & {}), string>>,
|
||||
): BasePromptTemplate<TemplatesVar, Vars>;
|
||||
|
||||
abstract format(
|
||||
options?: Partial<Record<TemplatesVar[number] | (string & {}), string>>,
|
||||
): string;
|
||||
|
||||
abstract formatMessages(
|
||||
options?: Partial<Record<TemplatesVar[number] | (string & {}), string>>,
|
||||
): ChatMessage[];
|
||||
|
||||
abstract get template(): string;
|
||||
}
|
||||
|
||||
type Permutation<T, K = T> = [T] extends [never]
|
||||
? []
|
||||
: K extends K
|
||||
? [K, ...Permutation<Exclude<T, K>>]
|
||||
: never;
|
||||
|
||||
type Join<T extends any[], U extends string> = T extends [infer F, ...infer R]
|
||||
? R["length"] extends 0
|
||||
? `${F & string}`
|
||||
: `${F & string}${U}${Join<R, U>}`
|
||||
: never;
|
||||
|
||||
type WrapStringWithBracket<T extends string> = `{${T}}`;
|
||||
|
||||
export type StringTemplate<Var extends readonly string[]> =
|
||||
Var["length"] extends 0
|
||||
? string
|
||||
: Var["length"] extends number
|
||||
? number extends Var["length"]
|
||||
? string
|
||||
: `${string}${Join<Permutation<WrapStringWithBracket<Var[number]>>, `${string}`>}${string}`
|
||||
: never;
|
||||
|
||||
export type PromptTemplateOptions<
|
||||
TemplatesVar extends readonly string[],
|
||||
Vars extends readonly string[],
|
||||
Template extends StringTemplate<TemplatesVar>,
|
||||
> = BasePromptTemplateOptions<TemplatesVar, Vars> & {
|
||||
template: Template;
|
||||
promptType?: PromptType;
|
||||
};
|
||||
|
||||
export class PromptTemplate<
|
||||
const TemplatesVar extends readonly string[] = string[],
|
||||
const Vars extends readonly string[] = string[],
|
||||
const Template extends
|
||||
StringTemplate<TemplatesVar> = StringTemplate<TemplatesVar>,
|
||||
> extends BasePromptTemplate<TemplatesVar, Vars> {
|
||||
#template: Template;
|
||||
promptType: PromptType;
|
||||
|
||||
constructor(options: PromptTemplateOptions<TemplatesVar, Vars, Template>) {
|
||||
const { template, promptType, ...rest } = options;
|
||||
super(rest);
|
||||
this.#template = template;
|
||||
this.promptType = promptType ?? PromptType.custom;
|
||||
}
|
||||
|
||||
partialFormat(
|
||||
options: Partial<Record<TemplatesVar[number] | (string & {}), string>>,
|
||||
): PromptTemplate<TemplatesVar, Vars, Template> {
|
||||
const prompt = new PromptTemplate({
|
||||
template: this.template,
|
||||
templateVars: [...this.templateVars],
|
||||
options: this.options,
|
||||
outputParser: this.outputParser,
|
||||
templateVarMappings: this.templateVarMappings,
|
||||
functionMappings: this.functionMappings,
|
||||
metadata: this.metadata,
|
||||
promptType: this.promptType,
|
||||
});
|
||||
|
||||
prompt.options = {
|
||||
...prompt.options,
|
||||
...options,
|
||||
};
|
||||
|
||||
return prompt;
|
||||
}
|
||||
|
||||
format(
|
||||
options?: Partial<Record<TemplatesVar[number] | (string & {}), string>>,
|
||||
): string {
|
||||
const allOptions = {
|
||||
...this.options,
|
||||
...options,
|
||||
} as Record<TemplatesVar[number], string>;
|
||||
|
||||
const mappedAllOptions = this.mapAllVars(allOptions);
|
||||
|
||||
const prompt = format(this.template, mappedAllOptions);
|
||||
|
||||
if (this.outputParser) {
|
||||
return this.outputParser.format(prompt);
|
||||
}
|
||||
return prompt;
|
||||
}
|
||||
|
||||
formatMessages(
|
||||
options?: Partial<Record<TemplatesVar[number] | (string & {}), string>>,
|
||||
): ChatMessage[] {
|
||||
const prompt = this.format(options);
|
||||
return [
|
||||
{
|
||||
role: "user",
|
||||
content: prompt,
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
get template(): Template {
|
||||
return this.#template;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
export { BasePromptTemplate, PromptTemplate } from "./base";
|
||||
export type {
|
||||
BasePromptTemplateOptions,
|
||||
PromptTemplateOptions,
|
||||
StringTemplate,
|
||||
} from "./base";
|
||||
export { PromptMixin, type ModuleRecord, type PromptsRecord } from "./mixin";
|
||||
export {
|
||||
anthropicSummaryPrompt,
|
||||
anthropicTextQaPrompt,
|
||||
defaultChoiceSelectPrompt,
|
||||
defaultCondenseQuestionPrompt,
|
||||
defaultContextSystemPrompt,
|
||||
defaultKeywordExtractPrompt,
|
||||
defaultQueryKeywordExtractPrompt,
|
||||
defaultRefinePrompt,
|
||||
defaultSubQuestionPrompt,
|
||||
defaultSummaryPrompt,
|
||||
defaultTextQAPrompt,
|
||||
defaultTreeSummarizePrompt,
|
||||
} from "./prompt";
|
||||
export type {
|
||||
ChoiceSelectPrompt,
|
||||
CondenseQuestionPrompt,
|
||||
ContextSystemPrompt,
|
||||
KeywordExtractPrompt,
|
||||
QueryKeywordExtractPrompt,
|
||||
RefinePrompt,
|
||||
SubQuestionPrompt,
|
||||
SummaryPrompt,
|
||||
TextQAPrompt,
|
||||
TreeSummarizePrompt,
|
||||
} from "./prompt";
|
||||
@@ -0,0 +1,79 @@
|
||||
import { objectEntries } from "../utils";
|
||||
import type { BasePromptTemplate } from "./base";
|
||||
|
||||
export type PromptsRecord = Record<string, BasePromptTemplate>;
|
||||
export type ModuleRecord = Record<string, PromptMixin>;
|
||||
|
||||
export abstract class PromptMixin {
|
||||
validatePrompts(promptsDict: PromptsRecord, moduleDict: ModuleRecord): void {
|
||||
for (const key of Object.keys(promptsDict)) {
|
||||
if (key.includes(":")) {
|
||||
throw new Error(`Prompt key ${key} cannot contain ':'.`);
|
||||
}
|
||||
}
|
||||
|
||||
for (const key of Object.keys(moduleDict)) {
|
||||
if (key.includes(":")) {
|
||||
throw new Error(`Module key ${key} cannot contain ':'.`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
getPrompts(): PromptsRecord {
|
||||
const promptsDict: PromptsRecord = this._getPrompts();
|
||||
|
||||
const moduleDict = this._getPromptModules();
|
||||
|
||||
this.validatePrompts(promptsDict, moduleDict);
|
||||
|
||||
const allPrompts: PromptsRecord = { ...promptsDict };
|
||||
|
||||
for (const [module_name, prompt_module] of objectEntries(moduleDict)) {
|
||||
for (const [key, prompt] of objectEntries(prompt_module.getPrompts())) {
|
||||
allPrompts[`${module_name}:${key}`] = prompt;
|
||||
}
|
||||
}
|
||||
return allPrompts;
|
||||
}
|
||||
|
||||
updatePrompts(prompts: PromptsRecord): void {
|
||||
const promptModules = this._getPromptModules();
|
||||
|
||||
this._updatePrompts(prompts);
|
||||
|
||||
const subPrompt: Record<string, PromptsRecord> = {};
|
||||
|
||||
for (const key in prompts) {
|
||||
if (key.includes(":")) {
|
||||
const [moduleName, subKey] = key.split(":") as [string, string];
|
||||
|
||||
if (!subPrompt[moduleName]) {
|
||||
subPrompt[moduleName] = {};
|
||||
}
|
||||
subPrompt[moduleName][subKey] = prompts[key]!;
|
||||
}
|
||||
}
|
||||
|
||||
for (const [moduleName, subPromptDict] of Object.entries(subPrompt)) {
|
||||
if (!promptModules[moduleName]) {
|
||||
throw new Error(`Module ${moduleName} not found.`);
|
||||
}
|
||||
|
||||
const moduleToUpdate = promptModules[moduleName];
|
||||
|
||||
moduleToUpdate.updatePrompts(subPromptDict);
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract _getPrompts(): PromptsRecord;
|
||||
protected abstract _updatePrompts(prompts: PromptsRecord): void;
|
||||
|
||||
/**
|
||||
*
|
||||
* Return a dictionary of sub-modules within the current module
|
||||
* that also implement PromptMixin (so that their prompts can also be get/set).
|
||||
*
|
||||
* Can be blank if no sub-modules.
|
||||
*/
|
||||
protected abstract _getPromptModules(): ModuleRecord;
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
import { z } from "zod";
|
||||
|
||||
const promptType = {
|
||||
SUMMARY: "summary",
|
||||
TREE_INSERT: "insert",
|
||||
TREE_SELECT: "tree_select",
|
||||
TREE_SELECT_MULTIPLE: "tree_select_multiple",
|
||||
QUESTION_ANSWER: "text_qa",
|
||||
REFINE: "refine",
|
||||
KEYWORD_EXTRACT: "keyword_extract",
|
||||
QUERY_KEYWORD_EXTRACT: "query_keyword_extract",
|
||||
SCHEMA_EXTRACT: "schema_extract",
|
||||
TEXT_TO_SQL: "text_to_sql",
|
||||
TEXT_TO_GRAPH_QUERY: "text_to_graph_query",
|
||||
TABLE_CONTEXT: "table_context",
|
||||
KNOWLEDGE_TRIPLET_EXTRACT: "knowledge_triplet_extract",
|
||||
SIMPLE_INPUT: "simple_input",
|
||||
PANDAS: "pandas",
|
||||
JSON_PATH: "json_path",
|
||||
SINGLE_SELECT: "single_select",
|
||||
MULTI_SELECT: "multi_select",
|
||||
VECTOR_STORE_QUERY: "vector_store_query",
|
||||
SUB_QUESTION: "sub_question",
|
||||
SQL_RESPONSE_SYNTHESIS: "sql_response_synthesis",
|
||||
SQL_RESPONSE_SYNTHESIS_V2: "sql_response_synthesis_v2",
|
||||
CONVERSATION: "conversation",
|
||||
DECOMPOSE: "decompose",
|
||||
CHOICE_SELECT: "choice_select",
|
||||
CUSTOM: "custom",
|
||||
RANKGPT_RERANK: "rankgpt_rerank",
|
||||
} as const;
|
||||
|
||||
const promptTypeSchema = z.enum([
|
||||
promptType.SUMMARY,
|
||||
promptType.TREE_INSERT,
|
||||
promptType.TREE_SELECT,
|
||||
promptType.TREE_SELECT_MULTIPLE,
|
||||
promptType.QUESTION_ANSWER,
|
||||
promptType.REFINE,
|
||||
promptType.KEYWORD_EXTRACT,
|
||||
promptType.QUERY_KEYWORD_EXTRACT,
|
||||
promptType.SCHEMA_EXTRACT,
|
||||
promptType.TEXT_TO_SQL,
|
||||
promptType.TEXT_TO_GRAPH_QUERY,
|
||||
promptType.TABLE_CONTEXT,
|
||||
promptType.KNOWLEDGE_TRIPLET_EXTRACT,
|
||||
promptType.SIMPLE_INPUT,
|
||||
promptType.PANDAS,
|
||||
promptType.JSON_PATH,
|
||||
promptType.SINGLE_SELECT,
|
||||
promptType.MULTI_SELECT,
|
||||
promptType.VECTOR_STORE_QUERY,
|
||||
promptType.SUB_QUESTION,
|
||||
promptType.SQL_RESPONSE_SYNTHESIS,
|
||||
promptType.SQL_RESPONSE_SYNTHESIS_V2,
|
||||
promptType.CONVERSATION,
|
||||
promptType.DECOMPOSE,
|
||||
promptType.CHOICE_SELECT,
|
||||
promptType.CUSTOM,
|
||||
promptType.RANKGPT_RERANK,
|
||||
]);
|
||||
|
||||
export const PromptType = promptTypeSchema.enum;
|
||||
export type PromptType = z.infer<typeof promptTypeSchema>;
|
||||
@@ -0,0 +1,253 @@
|
||||
import type { ChatMessage, ToolMetadata } from "../llms";
|
||||
import { PromptTemplate } from "./base";
|
||||
|
||||
export type TextQAPrompt = PromptTemplate<["context", "query"]>;
|
||||
export type SummaryPrompt = PromptTemplate<["context"]>;
|
||||
export type RefinePrompt = PromptTemplate<
|
||||
["query", "existingAnswer", "context"]
|
||||
>;
|
||||
export type TreeSummarizePrompt = PromptTemplate<["context", "query"]>;
|
||||
export type ChoiceSelectPrompt = PromptTemplate<["context", "query"]>;
|
||||
export type SubQuestionPrompt = PromptTemplate<["toolsStr", "queryStr"]>;
|
||||
export type CondenseQuestionPrompt = PromptTemplate<
|
||||
["chatHistory", "question"]
|
||||
>;
|
||||
export type ContextSystemPrompt = PromptTemplate<["context"]>;
|
||||
export type KeywordExtractPrompt = PromptTemplate<["context"]>;
|
||||
export type QueryKeywordExtractPrompt = PromptTemplate<["question"]>;
|
||||
|
||||
export const defaultTextQAPrompt: TextQAPrompt = new PromptTemplate({
|
||||
templateVars: ["context", "query"],
|
||||
template: `Context information is below.
|
||||
---------------------
|
||||
{context}
|
||||
---------------------
|
||||
Given the context information and not prior knowledge, answer the query.
|
||||
Query: {query}
|
||||
Answer:`,
|
||||
});
|
||||
|
||||
export const anthropicTextQaPrompt: TextQAPrompt = new PromptTemplate({
|
||||
templateVars: ["context", "query"],
|
||||
template: `Context information:
|
||||
<context>
|
||||
{context}
|
||||
</context>
|
||||
Given the context information and not prior knowledge, answer the query.
|
||||
Query: {query}`,
|
||||
});
|
||||
|
||||
export const defaultSummaryPrompt: SummaryPrompt = new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: `Write a summary of the following. Try to use only the information provided. Try to include as many key details as possible.
|
||||
|
||||
|
||||
{context}
|
||||
|
||||
|
||||
SUMMARY:"""
|
||||
`,
|
||||
});
|
||||
|
||||
export const anthropicSummaryPrompt: SummaryPrompt = new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: `Summarize the following text. Try to use only the information provided. Try to include as many key details as possible.
|
||||
<original-text>
|
||||
{context}
|
||||
</original-text>
|
||||
|
||||
SUMMARY:
|
||||
`,
|
||||
});
|
||||
|
||||
export const defaultRefinePrompt: RefinePrompt = new PromptTemplate({
|
||||
templateVars: ["query", "existingAnswer", "context"],
|
||||
template: `The original query is as follows: {query}
|
||||
We have provided an existing answer: {existingAnswer}
|
||||
We have the opportunity to refine the existing answer (only if needed) with some more context below.
|
||||
------------
|
||||
{context}
|
||||
------------
|
||||
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
|
||||
Refined Answer:`,
|
||||
});
|
||||
|
||||
export const defaultTreeSummarizePrompt = new PromptTemplate({
|
||||
templateVars: ["context", "query"],
|
||||
template: `Context information from multiple sources is below.
|
||||
---------------------
|
||||
{context}
|
||||
---------------------
|
||||
Given the information from multiple sources and not prior knowledge, answer the query.
|
||||
Query: {query}
|
||||
Answer:`,
|
||||
});
|
||||
|
||||
export const defaultChoiceSelectPrompt = new PromptTemplate({
|
||||
templateVars: ["context", "query"],
|
||||
template: `A list of documents is shown below. Each document has a number next to it along
|
||||
with a summary of the document. A question is also provided.
|
||||
Respond with the numbers of the documents
|
||||
you should consult to answer the question, in order of relevance, as well
|
||||
as the relevance score. The relevance score is a number from 1-10 based on
|
||||
how relevant you think the document is to the question.
|
||||
Do not include any documents that are not relevant to the question.
|
||||
Example format:
|
||||
Document 1:
|
||||
<summary of document 1>
|
||||
|
||||
Document 2:
|
||||
<summary of document 2>
|
||||
|
||||
...
|
||||
|
||||
Document 10:\n<summary of document 10>
|
||||
|
||||
Question: <question>
|
||||
Answer:
|
||||
Doc: 9, Relevance: 7
|
||||
Doc: 3, Relevance: 4
|
||||
Doc: 7, Relevance: 3
|
||||
|
||||
Let's try this now:
|
||||
|
||||
{context}
|
||||
Question: {query}
|
||||
Answer:`,
|
||||
});
|
||||
|
||||
export function buildToolsText(tools: ToolMetadata[]) {
|
||||
const toolsObj = tools.reduce<Record<string, string>>((acc, tool) => {
|
||||
acc[tool.name] = tool.description;
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
return JSON.stringify(toolsObj, null, 4);
|
||||
}
|
||||
|
||||
const exampleTools: ToolMetadata[] = [
|
||||
{
|
||||
name: "uber_10k",
|
||||
description: "Provides information about Uber financials for year 2021",
|
||||
},
|
||||
{
|
||||
name: "lyft_10k",
|
||||
description: "Provides information about Lyft financials for year 2021",
|
||||
},
|
||||
];
|
||||
|
||||
const exampleQueryStr = `Compare and contrast the revenue growth and EBITDA of Uber and Lyft for year 2021`;
|
||||
|
||||
const exampleOutput = [
|
||||
{
|
||||
subQuestion: "What is the revenue growth of Uber",
|
||||
toolName: "uber_10k",
|
||||
},
|
||||
{
|
||||
subQuestion: "What is the EBITDA of Uber",
|
||||
toolName: "uber_10k",
|
||||
},
|
||||
{
|
||||
subQuestion: "What is the revenue growth of Lyft",
|
||||
toolName: "lyft_10k",
|
||||
},
|
||||
{
|
||||
subQuestion: "What is the EBITDA of Lyft",
|
||||
toolName: "lyft_10k",
|
||||
},
|
||||
] as const;
|
||||
|
||||
export const defaultSubQuestionPrompt: SubQuestionPrompt = new PromptTemplate({
|
||||
templateVars: ["toolsStr", "queryStr"],
|
||||
template: `Given a user question, and a list of tools, output a list of relevant sub-questions that when composed can help answer the full user question:
|
||||
|
||||
# Example 1
|
||||
<Tools>
|
||||
\`\`\`json
|
||||
${buildToolsText(exampleTools)}
|
||||
\`\`\`
|
||||
|
||||
<User Question>
|
||||
${exampleQueryStr}
|
||||
|
||||
<Output>
|
||||
\`\`\`json
|
||||
${JSON.stringify(exampleOutput, null, 4)}
|
||||
\`\`\`
|
||||
|
||||
# Example 2
|
||||
<Tools>
|
||||
\`\`\`json
|
||||
{toolsStr}
|
||||
\`\`\`
|
||||
|
||||
<User Question>
|
||||
{queryStr}
|
||||
|
||||
<Output>
|
||||
`,
|
||||
});
|
||||
|
||||
export const defaultCondenseQuestionPrompt = new PromptTemplate({
|
||||
templateVars: ["chatHistory", "question"],
|
||||
template: `Given a conversation (between Human and Assistant) and a follow up message from Human, rewrite the message to be a standalone question that captures all relevant context from the conversation.
|
||||
|
||||
<Chat History>
|
||||
{chatHistory}
|
||||
|
||||
<Follow Up Message>
|
||||
{question}
|
||||
|
||||
<Standalone question>
|
||||
`,
|
||||
});
|
||||
|
||||
export function messagesToHistoryStr(messages: ChatMessage[]) {
|
||||
return messages.reduce((acc, message) => {
|
||||
acc += acc ? "\n" : "";
|
||||
if (message.role === "user") {
|
||||
acc += `Human: ${message.content}`;
|
||||
} else {
|
||||
acc += `Assistant: ${message.content}`;
|
||||
}
|
||||
return acc;
|
||||
}, "");
|
||||
}
|
||||
|
||||
export const defaultContextSystemPrompt: ContextSystemPrompt =
|
||||
new PromptTemplate({
|
||||
templateVars: ["context"],
|
||||
template: `Context information is below.
|
||||
---------------------
|
||||
{context}
|
||||
---------------------`,
|
||||
});
|
||||
|
||||
export const defaultKeywordExtractPrompt: KeywordExtractPrompt =
|
||||
new PromptTemplate({
|
||||
templateVars: ["maxKeywords", "context"],
|
||||
template: `
|
||||
Some text is provided below. Given the text, extract up to {maxKeywords} keywords from the text. Avoid stopwords.
|
||||
---------------------
|
||||
{context}
|
||||
---------------------
|
||||
Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'
|
||||
`,
|
||||
}).partialFormat({
|
||||
maxKeywords: "10",
|
||||
});
|
||||
|
||||
export const defaultQueryKeywordExtractPrompt = new PromptTemplate({
|
||||
templateVars: ["maxKeywords", "question"],
|
||||
template: `(
|
||||
"A question is provided below. Given the question, extract up to {maxKeywords} "
|
||||
"keywords from the text. Focus on extracting the keywords that we can use "
|
||||
"to best lookup answers to the question. Avoid stopwords."
|
||||
"---------------------"
|
||||
"{question}"
|
||||
"---------------------"
|
||||
"Provide keywords in the following comma-separated format: 'KEYWORDS: <keywords>'"
|
||||
)`,
|
||||
}).partialFormat({
|
||||
maxKeywords: "10",
|
||||
});
|
||||
@@ -1,4 +1,5 @@
|
||||
export * from "./node";
|
||||
export { FileReader, TransformComponent, type BaseReader } from "./type";
|
||||
export type { BaseOutputParser } from "./type/base-output-parser";
|
||||
export { EngineResponse } from "./type/engine–response";
|
||||
export * from "./zod";
|
||||
|
||||
@@ -38,13 +38,15 @@ export type RelatedNodeType<T extends Metadata = Metadata> =
|
||||
| RelatedNodeInfo<T>[];
|
||||
|
||||
export type BaseNodeParams<T extends Metadata = Metadata> = {
|
||||
id_?: string;
|
||||
metadata?: T;
|
||||
excludedEmbedMetadataKeys?: string[];
|
||||
excludedLlmMetadataKeys?: string[];
|
||||
relationships?: Partial<Record<NodeRelationship, RelatedNodeType<T>>>;
|
||||
hash?: string;
|
||||
embedding?: number[];
|
||||
id_?: string | undefined;
|
||||
metadata?: T | undefined;
|
||||
excludedEmbedMetadataKeys?: string[] | undefined;
|
||||
excludedLlmMetadataKeys?: string[] | undefined;
|
||||
relationships?:
|
||||
| Partial<Record<NodeRelationship, RelatedNodeType<T>>>
|
||||
| undefined;
|
||||
hash?: string | undefined;
|
||||
embedding?: number[] | undefined;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -58,7 +60,7 @@ export abstract class BaseNode<T extends Metadata = Metadata> {
|
||||
* Set to a UUID by default.
|
||||
*/
|
||||
id_: string;
|
||||
embedding?: number[];
|
||||
embedding: number[] | undefined;
|
||||
|
||||
// Metadata fields
|
||||
metadata: T;
|
||||
@@ -198,11 +200,11 @@ export abstract class BaseNode<T extends Metadata = Metadata> {
|
||||
|
||||
export type TextNodeParams<T extends Metadata = Metadata> =
|
||||
BaseNodeParams<T> & {
|
||||
text?: string;
|
||||
textTemplate?: string;
|
||||
startCharIdx?: number;
|
||||
endCharIdx?: number;
|
||||
metadataSeparator?: string;
|
||||
text?: string | undefined;
|
||||
textTemplate?: string | undefined;
|
||||
startCharIdx?: number | undefined;
|
||||
endCharIdx?: number | undefined;
|
||||
metadataSeparator?: string | undefined;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -418,7 +420,7 @@ export class ImageDocument<T extends Metadata = Metadata> extends ImageNode<T> {
|
||||
*/
|
||||
export interface NodeWithScore<T extends Metadata = Metadata> {
|
||||
node: BaseNode<T>;
|
||||
score?: number;
|
||||
score?: number | undefined;
|
||||
}
|
||||
|
||||
export enum ModalityType {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user