Compare commits

...

27 Commits

Author SHA1 Message Date
github-actions[bot] 7a2485cca2 Release 0.11.12 (#2050)
Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
2025-07-02 11:41:55 +07:00
Marcus Schiesser 1329186a23 docs: clarify how to run docs 2025-07-02 11:33:48 +07:00
dependabot[bot] 5d6e7384f5 chore(deps-dev): bump @modelcontextprotocol/server-filesystem from 2025.3.28 to 2025.7.1 (#2055) 2025-07-02 11:26:18 +07:00
allen f2dfd305fb implement bm25 retriever (#2045)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-07-02 11:22:47 +07:00
Huu Le 3cd8a573df feat: update interpreter to always upload all files in the configured directory (#2057) 2025-07-02 10:57:04 +07:00
Laurie Voss 09c6077f6e Import path for llamaparsereader (#2056) 2025-07-01 16:51:25 -07:00
Logan 14cc65b4e3 add google analytics (#2053)
Co-authored-by: Alex Yang <himself65@outlook.com>
2025-07-01 11:18:14 -07:00
Marcus Schiesser c544d8f67c docs: review and update memory doc 2025-07-01 15:10:43 +07:00
Huu Le d578889e21 feat: new memory api (#2028)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-07-01 09:30:49 +07:00
Marcus Schiesser 9f745d1941 chore: revert to wrong opus change 2025-07-01 09:07:46 +07:00
Alex Yang f292e94dcd fix: change default claude model (#2052) 2025-06-30 15:19:40 -07:00
Marcus Schiesser 0fcc92f632 fix: sentence splitter must not trim whitespaces (#2046) 2025-06-30 17:32:04 +07:00
Marcus Schiesser 515a8b9111 fix: error logging for fromPersistPath (#2049) 2025-06-30 13:41:13 +07:00
github-actions[bot] 7e8efc6284 Release @llamaindex/tools@0.1.2 (#2048) 2025-06-30 11:40:54 +07:00
Wassim Chegham 0fcf65126d chore: export type MCPClientOptions (#2047)
Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>
2025-06-28 10:55:07 +07:00
github-actions[bot] a50acf634c Release 0.11.11 (#2044)
Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
2025-06-27 14:51:09 +07:00
Thuc Pham 7039e1a214 chore: migrate to @google/genai SDK (#2038)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-06-27 12:09:26 +07:00
github-actions[bot] 785d010cd3 Release 0.11.10 (#2037)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
2025-06-26 14:29:33 +07:00
Marcus Schiesser b878032131 fix release step 2025-06-26 14:18:56 +07:00
Marcus Schiesser f7ec293a0f chore: Update workflow-core (#2042) 2025-06-26 14:03:03 +07:00
jerinthomascarmel 49a5e0a8cf feat(readers): add ExcelReader for parsing Excel files (run-llama#1959) (#2033)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
Co-authored-by: leehuwuj <leehuwuj@gmail.com>
2025-06-26 11:15:19 +07:00
Logan 118924799a Rename llama-flow -> workflows in docs (#2040) 2025-06-25 15:52:04 -07:00
allen ec8f673dae support filter to supabase vector search (#2036) 2025-06-25 16:17:54 +07:00
github-actions[bot] 85039a5360 Release @llamaindex/tools@0.1.0 (#2034) 2025-06-24 12:32:24 +07:00
Marcus Schiesser d7305edb53 fix changesets 2025-06-24 12:26:09 +07:00
Huu Le 096bf2bda1 feat: Add support for StreamableHTTP MCP Client (#2032) 2025-06-24 11:40:34 +07:00
jerinthomascarmel c5846bd7dc feat(readers): add XMLReader for parsing XML files (#1846) (#2031)
Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>
2025-06-24 10:46:32 +07:00
243 changed files with 7368 additions and 2340 deletions
+1 -1
View File
@@ -25,7 +25,7 @@ Make sure you have Node.js LTS (Long-term Support) installed. You can check your
```shell
node -v
# v20.x.x
# v22.x.x
```
### Use pnpm
+44
View File
@@ -1,5 +1,49 @@
# @llamaindex/doc
## 0.2.32
### Patch Changes
- Updated dependencies [d578889]
- Updated dependencies [0fcc92f]
- Updated dependencies [515a8b9]
- @llamaindex/core@0.6.13
- llamaindex@0.11.12
- @llamaindex/cloud@4.0.17
- @llamaindex/node-parser@2.0.13
- @llamaindex/openai@0.4.7
- @llamaindex/readers@3.1.12
- @llamaindex/workflow@1.1.13
## 0.2.31
### Patch Changes
- Updated dependencies [7039e1a]
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
- @llamaindex/core@0.6.12
- @llamaindex/cloud@4.0.16
- @llamaindex/node-parser@2.0.12
- @llamaindex/openai@0.4.6
- @llamaindex/readers@3.1.11
- @llamaindex/workflow@1.1.12
## 0.2.30
### Patch Changes
- Updated dependencies [f7ec293]
- @llamaindex/workflow@1.1.11
- llamaindex@0.11.10
## 0.2.29
### Patch Changes
- Updated dependencies [c5846bd]
- @llamaindex/readers@3.1.10
## 0.2.28
### Patch Changes
+1 -1
View File
@@ -111,7 +111,7 @@ Key build process:
**Content Sources:**
- Local MDX files in `src/content/docs/`
- External docs from `@llama-flow/docs` package
- External docs from `@llamaindex/workflow-docs` package
- Generated API docs from TypeScript source
### Development Notes
+2
View File
@@ -3,6 +3,8 @@
This is a Next.js application generated with
[Create Fumadocs](https://github.com/fuma-nama/fumadocs).
> Note: Before running the development server, make sure to build the whole project first, see [CONTRIBUTING.md](../../CONTRIBUTING.md) for more details.
Run development server:
```bash
+2 -2
View File
@@ -23,8 +23,8 @@ const config = {
permanent: true,
},
{
source: "/docs/llamaflow/:path*.mdx",
destination: "/docs/llamaflow/:path*",
source: "/docs/workflows/:path*.mdx",
destination: "/docs/workflows/:path*",
permanent: true,
},
];
+4 -3
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/doc",
"version": "0.2.28",
"version": "0.2.32",
"private": true,
"scripts": {
"postinstall": "fumadocs-mdx",
@@ -15,7 +15,6 @@
"dependencies": {
"@huggingface/transformers": "^3.5.0",
"@icons-pack/react-simple-icons": "^10.1.0",
"@llama-flow/docs": "0.0.8",
"@llamaindex/chat-ui-docs": "^0.0.5",
"@llamaindex/cloud": "workspace:*",
"@llamaindex/core": "workspace:*",
@@ -23,8 +22,10 @@
"@llamaindex/openai": "workspace:*",
"@llamaindex/readers": "workspace:*",
"@llamaindex/workflow": "workspace:*",
"@llamaindex/workflow-docs": "0.1.1",
"@mdx-js/mdx": "^3.1.0",
"@monaco-editor/react": "^4.7.0",
"@next/third-parties": "^15.3.4",
"@number-flow/react": "^0.3.4",
"@radix-ui/react-dialog": "^1.1.2",
"@radix-ui/react-icons": "^1.3.2",
@@ -69,7 +70,7 @@
"twoslash": "^0.3.1",
"use-stick-to-bottom": "^1.0.42",
"web-tree-sitter": "^0.24.4",
"zod": "^3.23.8"
"zod": "^3.25.67"
},
"devDependencies": {
"@next/env": "^15.3.0",
+1 -1
View File
@@ -13,7 +13,7 @@ const INTERNAL_LINK_REGEX = /(?:(?:\]\(|\bhref=["'])\/docs\/([^")]+))/g;
// This captures relative links like [text](./path) or ![alt](../images/image.png)
const RELATIVE_LINK_REGEX = /(?:\]\()(?:\s*)(?:\.\.?)\//g;
const ALLOWED_LINKS = ["/docs/llamaflow", "/docs/chat-ui"];
const ALLOWED_LINKS = ["/docs/workflows", "/docs/chat-ui"];
interface LinkValidationResult {
file: string;
+2 -2
View File
@@ -11,9 +11,9 @@ import remarkMath from "remark-math";
export const docs = defineDocs({
dir: [
"./src/content/docs",
"./node_modules/@llama-flow/docs",
"./node_modules/@llamaindex/workflow-docs",
"./node_modules/@llamaindex/chat-ui-docs",
// NOTE: When adding external docs (like chat-ui or llama-flow above),
// NOTE: When adding external docs (like chat-ui or workflow-docs above),
// make sure to also update:
// 1. scripts/validate-links.mts - add to ALLOWED_LINKS array
// 2. next.config.mjs - add redirect for .mdx files
+2
View File
@@ -1,5 +1,6 @@
import { AIProvider } from "@/actions";
import { TooltipProvider } from "@/components/ui/tooltip";
import { GoogleAnalytics } from "@next/third-parties/google";
import { RootProvider } from "fumadocs-ui/provider";
import { Inter } from "next/font/google";
import type { ReactNode } from "react";
@@ -39,6 +40,7 @@ export default function Layout({ children }: { children: ReactNode }) {
</AIProvider>
</TooltipProvider>
</body>
<GoogleAnalytics gaId="G-NB9B8LW9W5" />
</html>
);
}
@@ -74,12 +74,21 @@ const server = mcp({
args: ["-y", "@modelcontextprotocol/server-filesystem", "."],
verbose: true,
});
// or by SSE
// or by StreamableHTTP transport
const server = mcp({
url: "http://localhost:8000/mcp",
verbose: true,
});
// if your MCP server is not using StreamableHTTP transport, you can also use SSE transport
// by setting useSSETransport to true.
// See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse-deprecated
const server = mcp({
url: "http://localhost:8000/mcp",
useSSETransport: true,
verbose: true,
});
// 3. Get tools from MCP server
const tools = await server.tools();
@@ -9,10 +9,13 @@ Workflows are designed to be flexible and can be used to build agents, RAG flows
To use workflows install this package:
```package-install
npm i @llamaindex/workflow
npm i @llamaindex/workflow-core
```
This package is a stable, production-ready version of our [llama-flow](/docs/llamaflow) project.
This contains the core functionality for the workflow system. You can read more about the core concepts in the [workflow-core](/docs/workflows) section.
While you can still reference the llama-flow documentation for detailed information about the underlying concepts, we recommend using the `@llamaindex/workflow` package for all new projects to ensure stability and long-term availability.
In contrast, the `@llamaindex/workflow` package contains more utiltities, such as prebuilt agents.
```package-install
npm i @llamaindex/workflow
```
@@ -0,0 +1,182 @@
---
title: Memory
description: Manage conversation history and context with agents
---
## Concept
Memory is a core component of agentic systems. It allows you to store and retrieve information from the past.
In LlamaIndexTS, you can create memory by using the `createMemory` function. This function will return a `Memory` object, which you can then use to store and retrieve information.
As the agent runs, it will make calls to `add()` to store information, and `get()` to retrieve information.
## Usage
A `Memory` object has both short-term memory (i.e. a FIFO queue of messages) and optionally long-term memory (i.e. extracting information over time).
`get()` always returns all messages stored in the memory. The longer the agent runs, this will exceed the context window of the agent. To avoid this, the agent is using the `getLLM` method to get the last X messages that fit into the context window.
### Configuring Memory for an Agent
Here we're creating a memory with a static block (read more about [memory blocks](#long-term-memory)) that contains some information about the user.
```ts twoslash
import { openai } from "@llamaindex/openai";
import { agent } from "@llamaindex/workflow";
import { createMemory, staticBlock } from "llamaindex";
const llm = openai({ model: "gpt-4.1-mini" });
// Create memory with predefined context
const memory = createMemory({
memoryBlocks: [
staticBlock({
content:
"The user is a software engineer who loves TypeScript and LlamaIndex.",
}),
],
});
// Create an agent with the memory
const workflow = agent({
name: "assistant",
llm,
memory,
});
const result = await workflow.run("What is my name?");
console.log("Response:", result.data.result);
```
### Using Vercel format
You can also put messages in Vercel format directly to the memory:
```ts
await memory.add({
id: "1",
createdAt: new Date(),
role: "user",
content: "Hello!",
options: {
parts: [
{
type: "file",
data: "base64...",
mimeType: "image/png",
},
],
},
});
```
If you call `get`, messages are usually retrieved in the LlamaIndexTS format (type `ChatMessage`). If you specify the `type` parameter using `get`, you can return the messages in different formats. E.g.: using `type: "vercel"`, you can return the messages in Vercel format:
```ts
const messages = await memory.get({ type: "vercel" });
console.log(messages);
```
## Customizing Memory
### Short-Term Memory
The `Memory` object will store all the messages that are added to the `Memory` object. Unless you call `clear()`, no messages are removed from the memory. This is the short-term memory (usually you will store the memory of one user session there) which is augmented by the long-term memory.
Calling `getLLM` will retrieve messages from long-term memory and ensure that the given `tokenLimit` is not reached. These are the messages that you will sent to the LLM.
For initialization, you call `createMemory` with the following options:
- `tokenLimit`: Maximum tokens for memory retrieval using `getLLM` (default: 30000).
- `shortTermTokenLimitRatio`: Ratio of tokens for short-term vs long-term memory (default: 0.7)
- `customAdapters`: Custom message adapters for different message formats. LlamaIndex (`ChatMessageAdapter`) and Vercel (`VercelMessageAdapter`) are built-in adapters.
- `memoryBlocks`: Memory blocks for long-term storage, see [Long-Term Memory](#long-term-memory)
Example:
```ts
const memory = createMemory({
tokenLimit=40000,
shortTermTokenLimitRatio=0.5,
});
```
### Long-Term Memory
Long-term memory is represented as `Memory Block` objects. These objects contain information that are from previous user sessions or from the beginning of the current conversation. When memory is retrieved (by calling `getLLM`), the short-term and long-term memories are merged together within the given `tokenLimit`.
Currently, there are two predefined memory blocks:
- `staticBlock`: A memory block that stores a static piece of information.
- `factExtractionBlock`: A memory block that extracts facts from the chat history.
This sounds a bit complicated, but it's actually quite simple. Let's look at an example:
```ts
import { createMemory, factExtractionBlock, staticBlock } from "llamaindex";
const memoryBlocks= [
staticBlock({
id: "core_info",
content: "My name is Logan, and I live in Saskatoon. I work at LlamaIndex.",
}),
factExtractionBlock({
id: "user-extracted_info",
priority: 1,
llm: llm,
maxFacts: 50,
}),
];
```
Here, we've setup two memory blocks:
- `core_info`: A static memory block that stores some core information about the user. This information will always be inserted into the memory. The type used is `MessageContent` to support multi-modal content.
- `extracted_info`: An extracted memory block that will extract information from the chat history. Here we've passed in the `llm` to use to extract facts from the chat history, and set the `maxFacts` to 50. If the number of extracted facts exceeds this limit, the `maxFacts` will be automatically summarized and reduced to leave room for new information.
You'll also notice that we've set the `priority` for the `factExtractionBlock` block. This is used to determine the handling when the memory blocks content (i.e. long-term memory) + short-term memory exceeds the token limit on the `Memory` object.
- `priority=0`: This block will always be kept in memory (`staticBlocks` always have priority 0.)
- `priority=1, 2, 3, etc`: This determines the order in which memory blocks are truncated when the memory exceeds the token limit, to help the overall short-term memory + long-term memory content be less than or equal to the `tokenLimit`.
Now, let's pass these blocks into the `createMemory` function:
```ts
const memory = createMemory({
tokenLimit: 40000,
memoryBlocks: memoryBlocks,
)
```
When memory is retrieved (using `getLLM`), the short-term and long-term memories are merged together. The `Memory` object will ensure that the short-term memory + long-term memory content is less than or equal to the `tokenLimit`. If it is longer, messages are retrieved in the following order:
1. StaticMemoryBlock (information always included)
2. LongTermMemoryBlock (depending on priority)
3. ShortTermMemoryBlock
4. Transient messages
The amount of short-term memory included is specified by the `shortTermTokenLimitRatio`. If it's set to `0.7`, 70% of the `tokenLimit` is used for short-term memory (not including the static memory block).
## Persistence with Snapshots
Save and restore memory state:
```ts twoslash
import { createMemory, loadMemory } from "llamaindex";
const memory = createMemory();
// Add some messages
await memory.add({ role: "user", content: "Hello!" });
// Create snapshot
const snapshot = memory.snapshot();
// Later, restore from the snapshot
const restoredMemory = loadMemory(snapshot);
```
## Examples
Want to learn more about the Memory class? Check out our example codes in [Github](https://github.com/run-llama/LlamaIndexTS/tree/main/examples/agents/memory).
@@ -1,4 +1,11 @@
{
"title": "Data",
"pages": ["index", "readers", "data_index", "ingestion_pipeline", "stores"]
"pages": [
"index",
"memory",
"readers",
"data_index",
"ingestion_pipeline",
"stores"
]
}
@@ -28,11 +28,12 @@ embedding vector(1536)
);
```
-- Create a function for similarity search
-- Create a function for similarity search with filtering support
```sql
create function match_documents (
query_embedding vector(1536),
match_count int
match_count int,
filter jsonb DEFAULT '{}'
) returns table (
id uuid,
content text,
@@ -52,6 +53,7 @@ metadata,
embedding,
1 - (embedding <=> query_embedding) as similarity
from documents
where metadata @> filter
order by embedding <=> query_embedding
limit match_count;
end;
@@ -96,6 +98,7 @@ const index = await VectorStoreIndex.fromDocuments(documents, {
```ts
const queryEngine = index.asQueryEngine();
// Basic query without filters
const response = await queryEngine.query({
query: "What is in the document?",
});
@@ -104,6 +107,32 @@ const response = await queryEngine.query({
console.log(response.toString());
```
## Query with filters
You can filter documents based on metadata when querying:
```ts
import { FilterOperator, MetadataFilters } from "llamaindex";
// Create a filter for documents with author = "Jane Smith"
const filters: MetadataFilters = {
filters: [
{
key: "author",
value: "Jane Smith",
operator: FilterOperator.EQ,
},
],
};
// Query with filters
const filteredResponse = await vectorStore.query({
queryEmbedding: embedModel.getQueryEmbedding("What is vector search?"),
similarityTopK: 5,
filters,
});
```
## Full code
```ts
@@ -11,58 +11,130 @@ npm i llamaindex @llamaindex/google
## Usage
```ts
import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
import { Settings } from "llamaindex";
Settings.llm = new Gemini({
model: GEMINI_MODEL.GEMINI_PRO,
});
```
## Usage with Proxy
```ts
import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
import { Settings } from "llamaindex";
Settings.llm = new Gemini({
model: GEMINI_MODEL.GEMINI_PRO,
requestOptions: {
baseUrl: <YOUR_PROXY_URL> // optional, but useful for custom endpoints
}
Settings.llm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH,
});
```
### Usage with Vertex AI
To use Gemini via Vertex AI you can use `GeminiVertexSession`.
GeminiVertexSession accepts the env variables: `GOOGLE_VERTEX_LOCATION` and `GOOGLE_VERTEX_PROJECT`
To use Gemini via Vertex AI, you can specify the vertex configuration:
```ts
import { Gemini, GEMINI_MODEL, GeminiVertexSession } from "@llamaindex/google";
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
const gemini = new Gemini({
model: GEMINI_MODEL.GEMINI_PRO,
session: new GeminiVertexSession({
location: "us-central1", // optional if provided by GOOGLE_VERTEX_LOCATION env variable
project: "project1", // optional if provided by GOOGLE_VERTEX_PROJECT env variable
googleAuthOptions: {...}, // optional, but useful for production. It accepts all values from `GoogleAuthOptions`
}),
const llm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH,
vertex: {
project: "your-cloud-project", // required for Vertex AI
location: "us-central1", // required for Vertex AI
},
});
```
[GoogleAuthOptions](https://github.com/googleapis/google-auth-library-nodejs/blob/main/src/auth/googleauth.ts)
To authenticate for local development:
```bash
npm i @google-cloud/vertexai
gcloud auth application-default login
```
To authenticate for production you'll have to use a [service account](https://cloud.google.com/docs/authentication/). `googleAuthOptions` has `credentials` which might be useful for you.
## Multimodal Usage
Gemini supports multimodal inputs including text, images, audio, and video:
```ts
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
import fs from "fs";
const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
const result = await llm.chat({
messages: [
{
role: "user",
content: [
{
type: "text",
text: "What's in this image?",
},
{
type: "image",
data: fs.readFileSync("./image.jpg").toString("base64"),
mimeType: "image/jpeg",
},
],
},
],
});
```
## Tool Calling
Gemini supports function calling with tools:
```ts
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
import { tool } from "llamaindex";
import { z } from "zod";
const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
const result = await llm.chat({
messages: [
{
content: "What's the weather in Tokyo?",
role: "user",
},
],
tools: [
tool({
name: "weather",
description: "Get the weather",
parameters: z.object({
location: z.string().describe("The location to get the weather for"),
}),
execute: ({ location }) => {
return `The weather in ${location} is sunny and hot`;
},
}),
],
});
```
## Live API (Real-time Conversations)
For real-time audio/video conversations using [Gemini Live API](https://ai.google.dev/gemini-api/docs/live).
The Live API is running directly in the frontend. That's why you have to generate an ephemeral key first on the server side and pass it to the frontend.
To use the Live API, make sure to pass `apiVersion: "v1alpha"` to the `httpOptions`.
```ts
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
// Server-side: Generate ephemeral key
const serverLlm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
httpOptions: { apiVersion: "v1alpha" },
});
const ephemeralKey = await serverLlm.live.getEphemeralKey();
// Client-side: Use ephemeral key for Live API
const llm = gemini({
apiKey: ephemeralKey,
model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
voiceName: "Zephyr",
httpOptions: { apiVersion: "v1alpha" },
});
const session = await llm.live.connect();
```
## Load and index documents
For this example, we will use a single document. In a real-world scenario, you would have multiple documents to index.
@@ -90,11 +162,11 @@ const results = await queryEngine.query({
## Full Example
```ts
import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
import { Document, VectorStoreIndex, Settings } from "llamaindex";
Settings.llm = new Gemini({
model: GEMINI_MODEL.GEMINI_PRO,
Settings.llm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH,
});
async function main() {
@@ -104,9 +176,7 @@ async function main() {
const index = await VectorStoreIndex.fromDocuments([document]);
// Create a query engine
const queryEngine = index.asQueryEngine({
retriever,
});
const queryEngine = index.asQueryEngine();
const query = "What is the meaning of life?";
@@ -11,6 +11,7 @@ A retriever in LlamaIndex is what is used to fetch `Node`s from an index using a
- [KeywordTableLLMRetriever](/docs/api/classes/KeywordTableLLMRetriever) uses an LLM to extract keywords from the query and retrieve relevant nodes based on keyword matches.
- [KeywordTableSimpleRetriever](/docs/api/classes/KeywordTableSimpleRetriever) uses a basic frequency-based approach to extract keywords and retrieve nodes.
- [KeywordTableRAKERetriever](/docs/api/classes/KeywordTableRAKERetriever) uses the RAKE (Rapid Automatic Keyword Extraction) algorithm to extract keywords from the query, focusing on co-occurrence and context for keyword-based retrieval.
- [Bm25Retriever](/docs/api/classes/Bm25Retriever) uses the BM25 algorithm to extract keywords from the query and retrieve relevant nodes based on keyword matches.
```typescript
const retriever = vectorIndex.asRetriever({
+1 -1
View File
@@ -1,3 +1,3 @@
{
"pages": ["llamaindex", "api", "llamaflow", "chat-ui"]
"pages": ["llamaindex", "api", "workflows", "chat-ui"]
}
+1 -1
View File
@@ -4,7 +4,7 @@
"tasks": {
"build": {
"inputs": [
"node_modules/@llama-flow/docs/**",
"node_modules/@llamaindex/workflow-docs/**",
"node_modules/@llamaindex/chat-ui-docs/**",
"src/**/*.ts",
"src/**/*.tsx",
@@ -1,5 +1,25 @@
# @llamaindex/cloudflare-worker-agent-test
## 0.0.173
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
## 0.0.172
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
## 0.0.171
### Patch Changes
- llamaindex@0.11.10
## 0.0.170
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloudflare-worker-agent-test",
"version": "0.0.170",
"version": "0.0.173",
"type": "module",
"private": true,
"scripts": {
@@ -1,5 +1,17 @@
# @llamaindex/llama-parse-browser-test
## 0.0.72
### Patch Changes
- @llamaindex/cloud@4.0.17
## 0.0.71
### Patch Changes
- @llamaindex/cloud@4.0.16
## 0.0.70
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/llama-parse-browser-test",
"private": true,
"version": "0.0.70",
"version": "0.0.72",
"type": "module",
"scripts": {
"dev": "vite",
+20
View File
@@ -1,5 +1,25 @@
# @llamaindex/next-agent-test
## 0.1.173
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
## 0.1.172
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
## 0.1.171
### Patch Changes
- llamaindex@0.11.10
## 0.1.170
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-agent-test",
"version": "0.1.170",
"version": "0.1.173",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,25 @@
# test-edge-runtime
## 0.1.172
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
## 0.1.171
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
## 0.1.170
### Patch Changes
- llamaindex@0.11.10
## 0.1.169
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/nextjs-edge-runtime-test",
"version": "0.1.169",
"version": "0.1.172",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,36 @@
# @llamaindex/next-node-runtime
## 0.1.41
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
- @llamaindex/huggingface@0.1.17
- @llamaindex/readers@3.1.12
## 0.1.40
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
- @llamaindex/huggingface@0.1.16
- @llamaindex/readers@3.1.11
## 0.1.39
### Patch Changes
- llamaindex@0.11.10
## 0.1.38
### Patch Changes
- Updated dependencies [c5846bd]
- @llamaindex/readers@3.1.10
## 0.1.37
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-node-runtime-test",
"version": "0.1.37",
"version": "0.1.41",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,25 @@
# vite-import-llamaindex
## 0.0.39
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
## 0.0.38
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
## 0.0.37
### Patch Changes
- llamaindex@0.11.10
## 0.0.36
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "vite-import-llamaindex",
"private": true,
"version": "0.0.36",
"version": "0.0.39",
"type": "module",
"scripts": {
"build": "vite build",
@@ -1,5 +1,25 @@
# @llamaindex/waku-query-engine-test
## 0.0.173
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
## 0.0.172
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
## 0.0.171
### Patch Changes
- llamaindex@0.11.10
## 0.0.170
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/waku-query-engine-test",
"version": "0.0.170",
"version": "0.0.173",
"type": "module",
"private": true,
"scripts": {
+1 -1
View File
@@ -10,7 +10,7 @@ import { mockLLMEvent } from "./utils.js";
let llm: LLM;
beforeEach(async () => {
Settings.llm = new Anthropic({
model: "claude-3-opus",
model: "claude-3.5-sonnet",
});
llm = Settings.llm;
});
+1 -1
View File
@@ -7,7 +7,7 @@
"dependencies": {
"@llamaindex/workflow": "1.1.1",
"llamaindex": "0.10.5",
"zod": "^3.23.8"
"zod": "^3.25.67"
},
"devDependencies": {
"tsx": "^4.19.1",
+1 -1
View File
@@ -27,6 +27,6 @@
"pg": "^8.12.0",
"pgvector": "0.2.0",
"tsx": "^4.19.3",
"zod": "^3.24.2"
"zod": "^3.25.67"
}
}
+117
View File
@@ -1,5 +1,122 @@
# examples
## 0.3.26
### Patch Changes
- Updated dependencies [d578889]
- Updated dependencies [0fcc92f]
- Updated dependencies [515a8b9]
- Updated dependencies [3cd8a57]
- Updated dependencies [f2dfd30]
- @llamaindex/core@0.6.13
- llamaindex@0.11.12
- @llamaindex/tools@0.1.3
- @llamaindex/bm25-retriever@0.0.2
- @llamaindex/cloud@4.0.17
- @llamaindex/node-parser@2.0.13
- @llamaindex/anthropic@0.3.15
- @llamaindex/assemblyai@0.1.12
- @llamaindex/clip@0.0.63
- @llamaindex/cohere@0.0.27
- @llamaindex/deepinfra@0.0.63
- @llamaindex/discord@0.1.12
- @llamaindex/google@0.3.12
- @llamaindex/huggingface@0.1.17
- @llamaindex/jinaai@0.0.23
- @llamaindex/mistral@0.1.13
- @llamaindex/mixedbread@0.0.27
- @llamaindex/notion@0.1.12
- @llamaindex/ollama@0.1.13
- @llamaindex/openai@0.4.7
- @llamaindex/perplexity@0.0.20
- @llamaindex/portkey-ai@0.0.55
- @llamaindex/replicate@0.0.55
- @llamaindex/astra@0.0.27
- @llamaindex/azure@0.1.24
- @llamaindex/chroma@0.0.27
- @llamaindex/elastic-search@0.1.13
- @llamaindex/firestore@1.0.20
- @llamaindex/milvus@0.1.22
- @llamaindex/mongodb@0.0.28
- @llamaindex/pinecone@0.1.13
- @llamaindex/postgres@0.0.56
- @llamaindex/qdrant@0.1.23
- @llamaindex/supabase@0.1.13
- @llamaindex/upstash@0.0.27
- @llamaindex/weaviate@0.0.28
- @llamaindex/vercel@0.1.13
- @llamaindex/voyage-ai@1.0.19
- @llamaindex/readers@3.1.12
- @llamaindex/workflow@1.1.13
- @llamaindex/deepseek@0.0.23
- @llamaindex/fireworks@0.0.23
- @llamaindex/groq@0.0.78
- @llamaindex/together@0.0.23
- @llamaindex/vllm@0.0.49
- @llamaindex/xai@0.0.10
## 0.3.25
### Patch Changes
- Updated dependencies [7039e1a]
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
- @llamaindex/core@0.6.12
- @llamaindex/google@0.3.11
- @llamaindex/cloud@4.0.16
- @llamaindex/node-parser@2.0.12
- @llamaindex/anthropic@0.3.14
- @llamaindex/assemblyai@0.1.11
- @llamaindex/clip@0.0.62
- @llamaindex/cohere@0.0.26
- @llamaindex/deepinfra@0.0.62
- @llamaindex/discord@0.1.11
- @llamaindex/huggingface@0.1.16
- @llamaindex/jinaai@0.0.22
- @llamaindex/mistral@0.1.12
- @llamaindex/mixedbread@0.0.26
- @llamaindex/notion@0.1.11
- @llamaindex/ollama@0.1.12
- @llamaindex/openai@0.4.6
- @llamaindex/perplexity@0.0.19
- @llamaindex/portkey-ai@0.0.54
- @llamaindex/replicate@0.0.54
- @llamaindex/astra@0.0.26
- @llamaindex/azure@0.1.23
- @llamaindex/chroma@0.0.26
- @llamaindex/elastic-search@0.1.12
- @llamaindex/firestore@1.0.19
- @llamaindex/milvus@0.1.21
- @llamaindex/mongodb@0.0.27
- @llamaindex/pinecone@0.1.12
- @llamaindex/postgres@0.0.55
- @llamaindex/qdrant@0.1.22
- @llamaindex/supabase@0.1.12
- @llamaindex/upstash@0.0.26
- @llamaindex/weaviate@0.0.27
- @llamaindex/vercel@0.1.12
- @llamaindex/voyage-ai@1.0.18
- @llamaindex/readers@3.1.11
- @llamaindex/tools@0.1.1
- @llamaindex/workflow@1.1.12
- @llamaindex/deepseek@0.0.22
- @llamaindex/fireworks@0.0.22
- @llamaindex/groq@0.0.77
- @llamaindex/together@0.0.22
- @llamaindex/vllm@0.0.48
- @llamaindex/xai@0.0.9
## 0.3.24
### Patch Changes
- Updated dependencies [096bf2b]
- Updated dependencies [c5846bd]
- @llamaindex/tools@0.1.0
- @llamaindex/readers@3.1.10
## 0.3.23
### Patch Changes
+12 -3
View File
@@ -6,15 +6,24 @@ async function main() {
// Create an MCP server for filesystem tools
const server = mcp({
command: "npx",
args: ["-y", "@modelcontextprotocol/server-filesystem", "."],
args: ["-y", "@modelcontextprotocol/server-filesystem@latest", "."],
verbose: true,
});
// You can also connect to the MCP server using SSE
// See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse
//
// You can also connect to a remote MCP server using:
// 1. StreamableHTTP transport (recommended)
// See: https://modelcontextprotocol.io/docs/concepts/transports#streamable-http
// const server = mcp({
// url: "http://localhost:8000/mcp",
// verbose: true,
// });
// 2.Or using SSE transport (will be deprecated soon)
// See: https://modelcontextprotocol.io/docs/concepts/transports#server-sent-events-sse-deprecated
// const server = mcp({
// url: "http://localhost:8000/mcp",
// useSSETransport: true,
// verbose: true,
// });
try {
// Create an agent that uses the MCP tools
+36
View File
@@ -0,0 +1,36 @@
import { openai } from "@llamaindex/openai";
import { agent } from "@llamaindex/workflow";
import { createMemory, staticBlock } from "llamaindex";
// Simple example: Agent with Predefined Memory
async function simpleAgentMemoryExample() {
console.log("=== Simple Agent Memory Example ===");
const memory = createMemory({
memoryBlocks: [
staticBlock({
content:
"The user is a software engineer who loves TypeScript and LlamaIndex.",
}),
],
});
// Create agent workflow
const workflow = agent({
name: "assistant",
llm: openai({ model: "gpt-4.1-nano" }),
memory,
});
// Test - agent should remember John and the shopping cart context
console.log("\n--- Testing Memory Context ---");
const result = await workflow.run("Hi, my name is John. Do you know me?");
console.log("Assistant Response:", result.data.result);
const result2 = await workflow.run("What is my name?");
console.log("Assistant Response:", result2.data.result);
}
// Run the example
simpleAgentMemoryExample().catch(console.error);
+58
View File
@@ -0,0 +1,58 @@
import { openai } from "@llamaindex/openai";
import { createMemory } from "llamaindex";
// Example: Basic Memory Usage with Factory
async function basicMemoryExample() {
console.log("\n=== Example: Basic Memory Usage with Factory ===");
const memory = createMemory({ tokenLimit: 30 });
// Add messages to memory
await memory.add({
role: "user",
content: "Hi, my name is John and I'm a software engineer.",
});
await memory.add({
role: "assistant",
content: "Hello John! Nice to meet you. How can I help you today?",
});
await memory.add({
role: "user",
content: "I love working with TypeScript and React.",
});
// Not all messages are included because of token limit is set to 30
const llmMessages = await memory.getLLM();
console.log(
`\nLLM messages (${llmMessages.length} messages) limited by a small token limit:`,
);
llmMessages.forEach((msg, idx) => {
console.log(`${idx + 1}. ${msg.role}: ${msg.content}`);
});
// But the token limit above will be the window size of an LLM instance if you use getLLM with LLM
const llm = openai({ model: "gpt-4.1-mini" });
const llmMessagesWithLLM = await memory.getLLM(llm);
// Now all the messages are included because of the LLM window size of the model is much larger
console.log(
`\nLLM messages with LLM (${llmMessagesWithLLM.length} messages) limited by LLM window size:`,
);
llmMessagesWithLLM.forEach((msg, idx) => {
console.log(`${idx + 1}. ${msg.role}: ${msg.content}`);
});
}
// Main function
async function main() {
console.log("🧠 Basic Memory Factory Examples");
console.log("===============================");
try {
await basicMemoryExample();
} catch (error) {
console.error("Error running basic memory examples:", error);
}
}
main().catch(console.error);
+101
View File
@@ -0,0 +1,101 @@
import { openai } from "@llamaindex/openai";
import { createMemory, factExtractionBlock } from "llamaindex";
// Configure OpenAI
const llm = openai({ model: "gpt-4.1-mini" });
// Example: Memory with Fact Extraction
async function factExtractionMemoryExample() {
console.log("\n=== Memory with Fact Extraction ===");
// Create memory with a fact extraction
const memory = createMemory([], {
tokenLimit: 100,
shortTermTokenLimitRatio: 0.7, // 70% for short-term, 30% for long-term
memoryBlocks: [
factExtractionBlock({
id: "user-facts",
priority: 5,
llm: llm,
maxFacts: 10,
isLongTerm: true,
}),
],
});
// Simulate a conversation with facts
const conversationTurns = [
{
role: "user",
content: "Hi, I'm Sarah and I work as a data scientist at Google.",
},
{
role: "assistant",
content:
"Hello Sarah! It's great to meet you. Data science at Google must be exciting!",
},
{
role: "user",
content:
"Yes, I specialize in machine learning and natural language processing.",
},
{
role: "assistant",
content: "That's impressive! ML and NLP are fascinating fields.",
},
{
role: "user",
content:
"I have a PhD in Computer Science from Stanford, and I love hiking on weekends.",
},
{
role: "assistant",
content:
"Wow, Stanford PhD! And hiking is a great way to unwind from tech work.",
},
{
role: "user",
content: "I also have two cats named Whiskers and Mittens.",
},
{
role: "assistant",
content:
"Cats make wonderful companions! Whiskers and Mittens are cute names.",
},
];
// Add conversation turns to memory
console.log("Adding conversation to memory...");
for (const turn of conversationTurns) {
await memory.add(turn);
}
// Get messages - facts should be extracted and included
const messages = await memory.getLLM(llm);
console.log("\nMessages with extracted facts:");
messages.forEach((msg, idx) => {
console.log(`${idx + 1}. ${msg.role ?? "unknown"}: ${msg.content}`);
});
//Messages with extracted facts:
// 1. assistant: Cats make wonderful companions! Whiskers and Mittens are cute names.
// 2. user: I also have two cats named Whiskers and Mittens.
// 3. assistant: Wow, Stanford PhD! And hiking is a great way to unwind from tech work.
// 4. memory: Sarah works as a data scientist at Google
// Sarah specializes in machine learning and natural language processing
// Sarah has a PhD in Computer Science from Stanford
// Sarah enjoys hiking on weekends
}
// Main function
async function main() {
console.log("🧠 Fact Extraction Memory Example");
console.log("=================================");
try {
await factExtractionMemoryExample();
} catch (error) {
console.error("Error running fact extraction memory example:", error);
}
}
main().catch(console.error);
+62
View File
@@ -0,0 +1,62 @@
import { openai } from "@llamaindex/openai";
import { createMemory, staticBlock } from "llamaindex";
// Configure OpenAI
const llm = openai({ model: "gpt-4.1-mini" });
// Example: Memory with Static Blocks
async function staticMemoryBlockExample() {
console.log("\n=== Memory with Static Blocks ===");
console.log("- Memory always include static block");
console.log("- Memory cut off the messages within token limit\n");
// Create memory with a static block
const memory = createMemory([], {
tokenLimit: 30, // A small token limit which is not enough for the whole conversation below
memoryBlocks: [
staticBlock({
content:
"The user's name is John and he is a software engineer who loves TypeScript and LlamaIndex.",
}),
],
});
// Add some messages to the memory
await memory.add({
role: "user",
content: "What do you know about me?",
});
await memory.add({
role: "assistant",
content:
"Based on our conversation, I know you're John, a software engineer who enjoys working with TypeScript and LlamaIndex!",
});
await memory.add({
role: "user",
content: "Which language does LlamaIndex support?",
});
// Get messages
// static block will always be included
// only the last message will be included because of token limit set above
const messages = await memory.getLLM(llm);
messages.forEach((msg, idx) => {
console.log(`${idx + 1}. ${msg.role}: ${msg.content}`);
});
// Messages with static block:
// 1. user: The user's name is John and he is a software engineer who loves TypeScript and LlamaIndex.
// 2. user: Which language does LlamaIndex support?
}
// Main function
async function main() {
try {
await staticMemoryBlockExample();
} catch (error) {
console.error("Error running static memory blocks example:", error);
}
}
main().catch(console.error);
+72
View File
@@ -0,0 +1,72 @@
<?xml version="1.0" encoding="UTF-8"?>
<company name="MidSizeCorp" founded="2008">
<division name="Engineering" head="Dana White">
<department name="Frontend" lead="Alex Kim">
<team name="Web">
<employee id="E01">
<name>Jordan Lee</name>
<role>Lead Developer</role>
<projects>
<project code="PRJ101" status="active">
<title>User Portal</title>
<deadline>2025-08-01</deadline>
<tasks>
<task id="T1011">
<description>Implement login page</description>
<due>2025-05-10</due>
</task>
<task id="T1012">
<description>Design dashboard</description>
<due>2025-05-20</due>
</task>
</tasks>
</project>
</projects>
</employee>
<employee id="E02">
<name>Riley Chen</name>
<role>UI Designer</role>
</employee>
</team>
<team name="Mobile">
<employee id="E03">
<name>Sam Patel</name>
<role>iOS Developer</role>
</employee>
</team>
</department>
<department name="Backend" lead="Morgan Reed">
<team name="API">
<employee id="E04">
<name>Taylor Jones</name>
<role>API Engineer</role>
</employee>
</team>
<team name="Database">
<employee id="E05">
<name>Casey Nguyen</name>
<role>DB Administrator</role>
</employee>
</team>
</department>
</division>
<division name="Marketing" head="Pat Morgan">
<department name="Digital" lead="Alex Rivera">
<team name="Content">
<employee id="M01">
<name>Charlie Brooks</name>
<role>Content Strategist</role>
</employee>
</team>
</department>
</division>
<headquarters location="Chicago, USA">
<address>
<street>789 Lake Shore Drive</street>
<city>Chicago</city>
<zip>60601</zip>
</address>
</headquarters>
</company>
Binary file not shown.
+1 -1
View File
@@ -59,7 +59,7 @@ async function main() {
const anthropic = new Anthropic({
apiKey: process.env.ANTHROPIC_API_KEY,
model: "claude-3-opus",
model: "claude-3.5-sonnet",
});
// Create an ReActAgent with the function tools
@@ -61,7 +61,7 @@ async function main() {
// Create an OpenAIAgent with the function tools
const agent = new ReActAgent({
llm: new Anthropic({
model: "claude-3-opus",
model: "claude-3.5-sonnet",
}),
tools: [functionTool, functionTool2],
});
@@ -1,5 +1,5 @@
import { Anthropic } from "@llamaindex/anthropic";
import { ChatMemoryBuffer, SimpleChatEngine } from "llamaindex";
import { createMemory, SimpleChatEngine } from "llamaindex";
import { stdin as input, stdout as output } from "node:process";
import readline from "node:readline/promises";
@@ -9,14 +9,12 @@ import readline from "node:readline/promises";
model: "claude-3-7-sonnet",
});
// chatHistory will store all the messages in the conversation
const chatHistory = new ChatMemoryBuffer({
chatHistory: [
{
content: "You want to talk in rhymes.",
role: "system",
},
],
});
const chatHistory = createMemory([
{
content: "You want to talk in rhymes.",
role: "system",
},
]);
const chatEngine = new SimpleChatEngine({
llm,
memory: chatHistory,
+56 -8
View File
@@ -1,14 +1,16 @@
import { Gemini, GEMINI_MODEL } from "@llamaindex/google";
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
import fs from "fs";
import { tool } from "llamaindex";
import { z } from "zod";
(async () => {
if (!process.env.GOOGLE_API_KEY) {
throw new Error("Please set the GOOGLE_API_KEY environment variable.");
}
const gemini = new Gemini({
model: GEMINI_MODEL.GEMINI_PRO_1_5,
});
const result = await gemini.chat({
const llm = gemini({ model: GEMINI_MODEL.GEMINI_2_0_FLASH });
// normal chat
const result = await llm.chat({
messages: [
{ content: "You want to talk in rhymes.", role: "system" },
{
@@ -18,10 +20,10 @@ import fs from "fs";
},
],
});
console.log(result);
console.log("\n normal chat: \n", result);
// chat with file
const resultWithFile = await gemini.chat({
const resultWithFile = await llm.chat({
messages: [
{
role: "user",
@@ -39,6 +41,52 @@ import fs from "fs";
},
],
});
console.log("\n chat with file: \n", resultWithFile);
console.log(resultWithFile);
// chat with image base64
const resultWithImageFile = await llm.chat({
messages: [
{
role: "user",
content: [
{
type: "text",
text: "What's in this image?",
},
{
type: "image",
data: fs
.readFileSync("./multimodal/data/60.jpg")
.toString("base64"),
mimeType: "image/png",
},
],
},
],
});
console.log("\n chat with image base64: \n", resultWithImageFile);
// chat with tool
const resultWithTool = await llm.chat({
messages: [
{
content: "What's the weather in Tokyo?",
role: "user",
},
],
tools: [
tool({
name: "weather",
description: "Get the weather",
parameters: z.object({
location: z.string().describe("The location to get the weather for"),
}),
execute: ({ location }) => {
console.log("weather", location);
return `The weather in ${location} is sunny and hot`;
},
}),
],
});
console.log("\n chat with tool: \n", resultWithTool.message.options); // should have toolCall
})();
+8 -5
View File
@@ -1,11 +1,14 @@
import { Gemini, GEMINI_MODEL, GeminiVertexSession } from "@llamaindex/google";
import { gemini, GEMINI_MODEL } from "@llamaindex/google";
(async () => {
const gemini = new Gemini({
model: GEMINI_MODEL.GEMINI_PRO,
session: new GeminiVertexSession(),
const llm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH,
vertex: {
project: "your-cloud-project", // update to your cloud project
location: "us-central1",
},
});
const result = await gemini.chat({
const result = await llm.chat({
messages: [
{ content: "You want to talk in rhymes.", role: "system" },
{
+10
View File
@@ -16,9 +16,19 @@ async function main() {
console.log("🚀 Initializing Gemini Live API example...");
// Server-side (token creation):
const serverllm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to generate ephemeral key
});
const ephemeralKey = await serverllm.live.getEphemeralKey();
// Client-side (Live API connection):
const llm = gemini({
apiKey: ephemeralKey, // use ephemeral key for client-side
model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
voiceName: "Zephyr",
httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to init client with ephemeral key
});
console.log("📡 Connecting to Gemini Live session...");
+11 -1
View File
@@ -3,8 +3,18 @@ import { liveEvents } from "llamaindex";
import { saveWavFile } from "./util";
async function main() {
const llm = gemini({
// Server-side (token creation):
const serverllm = gemini({
model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to generate ephemeral key
});
const ephemeralKey = await serverllm.live.getEphemeralKey();
// Client-side (Live API connection):
const llm = gemini({
apiKey: ephemeralKey, // use ephemeral key for client-side
model: GEMINI_MODEL.GEMINI_2_0_FLASH_LIVE,
httpOptions: { apiVersion: "v1alpha" }, // must use v1alpha to init client with ephemeral key
});
const session = await llm.live.connect();
+48 -47
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/examples",
"version": "0.3.23",
"version": "0.3.26",
"private": true,
"scripts": {
"lint": "eslint .",
@@ -11,51 +11,52 @@
"@azure/cosmos": "^4.1.1",
"@azure/identity": "^4.4.1",
"@azure/search-documents": "^12.1.0",
"@llamaindex/anthropic": "^0.3.13",
"@llamaindex/assemblyai": "^0.1.10",
"@llamaindex/astra": "^0.0.25",
"@llamaindex/azure": "^0.1.22",
"@llamaindex/chroma": "^0.0.25",
"@llamaindex/clip": "^0.0.61",
"@llamaindex/cloud": "^4.0.15",
"@llamaindex/cohere": "^0.0.25",
"@llamaindex/core": "^0.6.11",
"@llamaindex/deepinfra": "^0.0.61",
"@llamaindex/deepseek": "^0.0.21",
"@llamaindex/discord": "^0.1.10",
"@llamaindex/elastic-search": "^0.1.11",
"@llamaindex/anthropic": "^0.3.15",
"@llamaindex/assemblyai": "^0.1.12",
"@llamaindex/astra": "^0.0.27",
"@llamaindex/azure": "^0.1.24",
"@llamaindex/bm25-retriever": "^0.0.2",
"@llamaindex/chroma": "^0.0.27",
"@llamaindex/clip": "^0.0.63",
"@llamaindex/cloud": "^4.0.17",
"@llamaindex/cohere": "^0.0.27",
"@llamaindex/core": "^0.6.13",
"@llamaindex/deepinfra": "^0.0.63",
"@llamaindex/deepseek": "^0.0.23",
"@llamaindex/discord": "^0.1.12",
"@llamaindex/elastic-search": "^0.1.13",
"@llamaindex/env": "^0.1.30",
"@llamaindex/firestore": "^1.0.18",
"@llamaindex/fireworks": "^0.0.21",
"@llamaindex/google": "^0.3.10",
"@llamaindex/groq": "^0.0.76",
"@llamaindex/huggingface": "^0.1.15",
"@llamaindex/jinaai": "^0.0.21",
"@llamaindex/milvus": "^0.1.20",
"@llamaindex/mistral": "^0.1.11",
"@llamaindex/mixedbread": "^0.0.25",
"@llamaindex/mongodb": "^0.0.26",
"@llamaindex/node-parser": "^2.0.11",
"@llamaindex/notion": "^0.1.10",
"@llamaindex/ollama": "^0.1.11",
"@llamaindex/openai": "^0.4.5",
"@llamaindex/perplexity": "^0.0.18",
"@llamaindex/pinecone": "^0.1.11",
"@llamaindex/portkey-ai": "^0.0.53",
"@llamaindex/postgres": "^0.0.54",
"@llamaindex/qdrant": "^0.1.21",
"@llamaindex/readers": "^3.1.9",
"@llamaindex/replicate": "^0.0.53",
"@llamaindex/supabase": "^0.1.10",
"@llamaindex/together": "^0.0.21",
"@llamaindex/tools": "^0.0.17",
"@llamaindex/upstash": "^0.0.25",
"@llamaindex/vercel": "^0.1.11",
"@llamaindex/vllm": "^0.0.47",
"@llamaindex/voyage-ai": "^1.0.17",
"@llamaindex/weaviate": "^0.0.26",
"@llamaindex/workflow": "^1.1.10",
"@llamaindex/xai": "workspace:^0.0.8",
"@llamaindex/firestore": "^1.0.20",
"@llamaindex/fireworks": "^0.0.23",
"@llamaindex/google": "^0.3.12",
"@llamaindex/groq": "^0.0.78",
"@llamaindex/huggingface": "^0.1.17",
"@llamaindex/jinaai": "^0.0.23",
"@llamaindex/milvus": "^0.1.22",
"@llamaindex/mistral": "^0.1.13",
"@llamaindex/mixedbread": "^0.0.27",
"@llamaindex/mongodb": "^0.0.28",
"@llamaindex/node-parser": "^2.0.13",
"@llamaindex/notion": "^0.1.12",
"@llamaindex/ollama": "^0.1.13",
"@llamaindex/openai": "^0.4.7",
"@llamaindex/perplexity": "^0.0.20",
"@llamaindex/pinecone": "^0.1.13",
"@llamaindex/portkey-ai": "^0.0.55",
"@llamaindex/postgres": "^0.0.56",
"@llamaindex/qdrant": "^0.1.23",
"@llamaindex/readers": "^3.1.12",
"@llamaindex/replicate": "^0.0.55",
"@llamaindex/supabase": "^0.1.13",
"@llamaindex/together": "^0.0.23",
"@llamaindex/tools": "^0.1.3",
"@llamaindex/upstash": "^0.0.27",
"@llamaindex/vercel": "^0.1.13",
"@llamaindex/vllm": "^0.0.49",
"@llamaindex/voyage-ai": "^1.0.19",
"@llamaindex/weaviate": "^0.0.28",
"@llamaindex/workflow": "^1.1.13",
"@llamaindex/xai": "workspace:^0.0.10",
"@notionhq/client": "^2.2.15",
"@pinecone-database/pinecone": "^4.0.0",
"@vercel/postgres": "^0.10.0",
@@ -64,11 +65,11 @@
"commander": "^12.1.0",
"dotenv": "^16.4.5",
"js-tiktoken": "^1.0.14",
"llamaindex": "^0.11.9",
"llamaindex": "^0.11.12",
"mongodb": "6.7.0",
"postgres": "^3.4.4",
"wikipedia": "^2.1.2",
"zod": "^3.23.8"
"zod": "^3.25.67"
},
"devDependencies": {
"@types/node": "^22.9.0",
+8 -13
View File
@@ -2,11 +2,7 @@ import { stdin as input, stdout as output } from "node:process";
import readline from "node:readline/promises";
import { OpenAI } from "@llamaindex/openai";
import {
ChatSummaryMemoryBuffer,
Settings,
SimpleChatEngine,
} from "llamaindex";
import { createMemory, Settings, SimpleChatEngine } from "llamaindex";
if (process.env.NODE_ENV === "development") {
Settings.callbackManager.on("llm-end", (event) => {
@@ -15,10 +11,13 @@ if (process.env.NODE_ENV === "development") {
}
async function main() {
// Set maxTokens to 75% of the context window size of 4096
// This will trigger the summarizer once the chat history reaches 25% of the context window size (1024 tokens)
const llm = new OpenAI({ model: "gpt-3.5-turbo", maxTokens: 4096 * 0.75 });
const chatHistory = new ChatSummaryMemoryBuffer({ llm });
const llm = new OpenAI({ model: "gpt-3.5-turbo" });
const chatHistory = createMemory([
{
content: "You are a helpful assistant.",
role: "system",
},
]);
const chatEngine = new SimpleChatEngine({ llm });
const rl = readline.createInterface({ input, output });
@@ -29,10 +28,6 @@ async function main() {
chatHistory,
stream: true,
});
if (chatHistory.getLastSummary()) {
// Print the summary of the conversation so far that is produced by the SummaryChatHistory
console.log(`Summary: ${chatHistory.getLastSummary()?.content}`);
}
for await (const chunk of stream) {
process.stdout.write(chunk.response);
}
+4 -1
View File
@@ -15,11 +15,14 @@
"start:llamaparse-json": "node --import tsx ./src/llamaparse-json.ts",
"start:discord": "node --import tsx ./src/discord.ts",
"start:json": "node --import tsx ./src/json.ts",
"start:obsidian": "node --import tsx ./src/obsidian.ts"
"start:obsidian": "node --import tsx ./src/obsidian.ts",
"start:xml": "node --import tsx ./src/xml.ts",
"start:excel": "node --import tsx ./src/excel.ts"
},
"dependencies": {
"@llamaindex/cloud": "workspace:* || ^2.0.24",
"@llamaindex/readers": "workspace:* || ^1.0.25",
"@llamaindex/excel": "workspace:*",
"llamaindex": "workspace:* || ^0.8.37"
},
"devDependencies": {
+20
View File
@@ -0,0 +1,20 @@
import { ExcelReader } from "@llamaindex/excel";
async function main() {
// Load PDF
const reader = new ExcelReader({
sheetSpecifier: 0,
concatRows: true,
fieldSeparator: ",",
keyValueSeparator: ":",
});
const documents = await reader.loadData("../data/sample_excel_sheet.xls");
for (const doc of documents) {
console.log(doc.text);
console.log("----");
}
}
main().catch(console.error);
+1 -1
View File
@@ -1,4 +1,4 @@
import { LlamaParseReader } from "@llamaindex/cloud";
import { LlamaParseReader } from "@llamaindex/cloud/reader";
import { openai, OpenAIEmbedding } from "@llamaindex/openai";
import { Settings, VectorStoreIndex } from "llamaindex";
@@ -1,4 +1,4 @@
import { LlamaParseReader } from "@llamaindex/cloud";
import { LlamaParseReader } from "@llamaindex/cloud/reader";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import { VectorStoreIndex } from "llamaindex";
+16
View File
@@ -0,0 +1,16 @@
import { XMLReader } from "@llamaindex/readers/xml";
async function main() {
// Load PDF
const reader = new XMLReader({
splitLevel: 2,
});
const documents = await reader.loadData("../data/company.xml");
for (const doc of documents) {
console.log(doc.text);
console.log("----");
}
}
main().catch(console.error);
+14
View File
@@ -0,0 +1,14 @@
# BM25 Retriever
In this guide, we introduce a bm25 retriever that search documents using the bm25 method. BM25 (Best Matching 25) is a ranking function that extends TF-IDF by considering term frequency saturation and document length. BM25 effectively ranks documents based on query term occurrence and rarity across the corpus.
## Setup
1. `cd` Into the `examples` directory
2. run `npm i`
## Example
```bash
`npx tsx ./retrievers/bm25/example.ts`
```
+33
View File
@@ -0,0 +1,33 @@
import { Bm25Retriever } from "@llamaindex/bm25-retriever";
import { OpenAIEmbedding } from "@llamaindex/openai";
import { PDFReader } from "@llamaindex/readers/pdf";
import { MetadataMode, Settings, VectorStoreIndex } from "llamaindex";
Settings.embedModel = new OpenAIEmbedding();
async function main() {
// Load PDF
const reader = new PDFReader();
const documents = await reader.loadData("./data/brk-2022.pdf");
// Split text and create embeddings. Store them in a VectorStoreIndex
const index = await VectorStoreIndex.fromDocuments(documents);
const retriever = new Bm25Retriever({
docStore: index.docStore,
topK: 3,
});
// Query the data
const response = await retriever.retrieve({
query: "What mistakes did Warren E. Buffett make?",
});
// Output response
response.forEach((r) => {
console.log(`Score: ${r.score}`);
console.log(`Text: ${r.node.getContent(MetadataMode.NONE)}`);
});
}
main().catch(console.error);
+1 -8
View File
@@ -1,8 +1,4 @@
import {
GEMINI_EMBEDDING_MODEL,
GeminiEmbedding,
GeminiSession,
} from "@llamaindex/google";
import { GEMINI_EMBEDDING_MODEL, GeminiEmbedding } from "@llamaindex/google";
import { QdrantVectorStore } from "@llamaindex/qdrant";
import {
Document,
@@ -12,9 +8,6 @@ import {
const embedding = new GeminiEmbedding({
model: GEMINI_EMBEDDING_MODEL.EMBEDDING_001,
session: new GeminiSession({
apiKey: process.env.GEMINI_API_KEY,
}),
});
async function main() {
+20
View File
@@ -1,5 +1,25 @@
# @llamaindex/autotool
## 8.0.12
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
## 8.0.11
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
## 8.0.10
### Patch Changes
- llamaindex@0.11.10
## 8.0.9
### Patch Changes
@@ -1,5 +1,28 @@
# @llamaindex/autotool-01-node-example
## 0.0.120
### Patch Changes
- Updated dependencies [515a8b9]
- llamaindex@0.11.12
- @llamaindex/autotool@8.0.12
## 0.0.119
### Patch Changes
- Updated dependencies [7039e1a]
- llamaindex@0.11.11
- @llamaindex/autotool@8.0.11
## 0.0.118
### Patch Changes
- llamaindex@0.11.10
- @llamaindex/autotool@8.0.10
## 0.0.117
### Patch Changes
@@ -13,5 +13,5 @@
"scripts": {
"start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
},
"version": "0.0.117"
"version": "0.0.120"
}
+1 -1
View File
@@ -6,7 +6,7 @@
"url": "git+https://github.com/run-llama/LlamaIndexTS.git",
"directory": "packages/autotool"
},
"version": "8.0.9",
"version": "8.0.12",
"description": "auto transpile your JS function to LLM Agent compatible",
"files": [
"dist",
+17
View File
@@ -1,5 +1,22 @@
# @llamaindex/cloud
## 4.0.17
### Patch Changes
- Updated dependencies [d578889]
- Updated dependencies [0fcc92f]
- Updated dependencies [515a8b9]
- @llamaindex/core@0.6.13
## 4.0.16
### Patch Changes
- Updated dependencies [7039e1a]
- Updated dependencies [7039e1a]
- @llamaindex/core@0.6.12
## 4.0.15
### Patch Changes
+2 -2
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloud",
"version": "4.0.15",
"version": "4.0.17",
"type": "module",
"license": "MIT",
"scripts": {
@@ -79,6 +79,6 @@
},
"dependencies": {
"p-retry": "^6.2.1",
"zod": "^3.25.7"
"zod": "^3.25.67"
}
}
+15
View File
@@ -1,5 +1,20 @@
# @llamaindex/core
## 0.6.13
### Patch Changes
- d578889: Add new memory API
- 0fcc92f: Fix: split sentences must not trim whitespaces
- 515a8b9: Fix: logging for fromPersistPath
## 0.6.12
### Patch Changes
- 7039e1a: Internal cleanup of base64 encoding
- 7039e1a: chore: migrate to @google/genai SDK
## 0.6.11
### Patch Changes
+3 -3
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/core",
"type": "module",
"version": "0.6.11",
"version": "0.6.13",
"description": "LlamaIndex Core Module",
"exports": {
"./agent": {
@@ -312,7 +312,7 @@
"@llamaindex/env": "workspace:*",
"@types/node": "^22.9.0",
"magic-bytes.js": "^1.10.0",
"zod": "^3.23.8",
"zod-to-json-schema": "^3.23.3"
"zod": "^3.25.67",
"zod-to-json-schema": "^3.24.6"
}
}
+2
View File
@@ -152,6 +152,7 @@ export type AgentParamsBase<
/**
* Worker will schedule tasks and handle the task execution
* @deprecated Use agent instead.
*/
export abstract class AgentWorker<
AI extends LLM,
@@ -250,6 +251,7 @@ export abstract class AgentWorker<
/**
* Runner will manage the task execution and provide a high-level API for the user
* @deprecated Use agent instead.
*/
export abstract class AgentRunner<
AI extends LLM,
+3
View File
@@ -62,6 +62,9 @@ export class LLMAgentWorker extends AgentWorker<LLM> {
taskHandler = AgentRunner.defaultTaskHandler;
}
/**
* @deprecated Use agent instead.
*/
export class LLMAgent extends AgentRunner<LLM> {
constructor(params: LLMAgentParams<LLM>) {
validateAgentParams(params);
+2 -4
View File
@@ -1,5 +1,5 @@
import type { ChatMessage, MessageContent } from "../llms";
import type { BaseMemory } from "../memory";
import type { Memory } from "../memory";
import { EngineResponse } from "../schema";
export interface BaseChatEngineParams<
@@ -9,9 +9,7 @@ export interface BaseChatEngineParams<
/**
* Optional chat history if you want to customize the chat history.
*/
chatHistory?:
| ChatMessage<AdditionalMessageOptions>[]
| BaseMemory<AdditionalMessageOptions>;
chatHistory?: ChatMessage<AdditionalMessageOptions>[] | Memory;
}
export interface StreamingChatEngineParams<
@@ -1,7 +1,7 @@
import { wrapEventCaller } from "../decorator";
import { Settings } from "../global";
import type { ChatMessage, LLM, MessageContent, MessageType } from "../llms";
import { BaseMemory, ChatMemoryBuffer } from "../memory";
import { Memory, createMemory } from "../memory";
import type { BaseNodePostprocessor } from "../postprocessor";
import {
type ContextSystemPrompt,
@@ -23,7 +23,7 @@ import type { ContextGenerator } from "./type";
export type ContextChatEngineOptions = {
retriever: BaseRetriever;
chatModel?: LLM | undefined;
chatHistory?: ChatMessage[] | undefined;
chatHistory?: ChatMessage[] | Memory | undefined;
contextSystemPrompt?: ContextSystemPrompt | undefined;
nodePostprocessors?: BaseNodePostprocessor[] | undefined;
systemPrompt?: string | undefined;
@@ -37,18 +37,21 @@ export type ContextChatEngineOptions = {
*/
export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
chatModel: LLM;
memory: BaseMemory;
memory: Memory;
contextGenerator: ContextGenerator & PromptMixin;
systemPrompt?: string | undefined;
get chatHistory() {
return this.memory.getMessages();
return this.memory.getLLM();
}
constructor(init: ContextChatEngineOptions) {
super();
this.chatModel = init.chatModel ?? Settings.llm;
this.memory = new ChatMemoryBuffer({ chatHistory: init?.chatHistory });
this.memory =
init?.chatHistory instanceof Memory
? init.chatHistory
: createMemory(init?.chatHistory ?? []);
this.contextGenerator = new DefaultContextGenerator({
retriever: init.retriever,
contextSystemPrompt: init?.contextSystemPrompt,
@@ -87,12 +90,9 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
): Promise<EngineResponse | AsyncIterable<EngineResponse>> {
const { message, stream } = params;
const chatHistory = params.chatHistory
? new ChatMemoryBuffer({
chatHistory:
params.chatHistory instanceof BaseMemory
? await params.chatHistory.getMessages()
: params.chatHistory,
})
? params.chatHistory instanceof Memory
? params.chatHistory
: createMemory(params.chatHistory)
: this.memory;
const requestMessages = await this.prepareRequestMessages(
message,
@@ -110,7 +110,7 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
initialValue: "",
reducer: (accumulator, part) => (accumulator += part.delta),
finished: (accumulator) => {
chatHistory.put({ content: accumulator, role: "assistant" });
void chatHistory.add({ content: accumulator, role: "assistant" });
},
}),
(r) => EngineResponse.fromChatResponseChunk(r, requestMessages.nodes),
@@ -120,26 +120,26 @@ export class ContextChatEngine extends PromptMixin implements BaseChatEngine {
messages: requestMessages.messages,
additionalChatOptions: params.chatOptions as object,
});
chatHistory.put(response.message);
await chatHistory.add(response.message);
return EngineResponse.fromChatResponse(response, requestMessages.nodes);
}
reset() {
this.memory.reset();
async reset() {
await this.memory.clear();
}
private async prepareRequestMessages(
message: MessageContent,
chatHistory: BaseMemory,
chatHistory: Memory,
) {
chatHistory.put({
await chatHistory.add({
content: message,
role: "user",
});
const textOnly = extractText(message);
const context = await this.contextGenerator.generate(textOnly);
const systemMessage = this.prependSystemPrompt(context.message);
const messages = await chatHistory.getMessages([systemMessage]);
const messages = await chatHistory.getLLM(this.chatModel, [systemMessage]);
return { nodes: context.nodes, messages };
}
@@ -1,5 +1,5 @@
import type { LLM } from "../llms";
import { BaseMemory, ChatMemoryBuffer } from "../memory";
import { createMemory, Memory } from "../memory";
import { EngineResponse } from "../schema";
import { streamConverter, streamReducer } from "../utils";
import type {
@@ -16,20 +16,16 @@ import { Settings } from "../global";
*/
export class SimpleChatEngine implements BaseChatEngine {
memory: BaseMemory;
memory: Memory;
llm: LLM;
get chatHistory() {
return this.memory.getMessages();
return this.memory.getLLM();
}
constructor(init?: Partial<SimpleChatEngine>) {
this.llm = init?.llm ?? Settings.llm;
this.memory =
init?.memory ??
new ChatMemoryBuffer({
llm: this.llm,
});
this.memory = init?.memory ?? createMemory();
}
chat(params: NonStreamingChatEngineParams): Promise<EngineResponse>;
@@ -43,19 +39,15 @@ export class SimpleChatEngine implements BaseChatEngine {
const { message, stream } = params;
const chatHistory = params.chatHistory
? new ChatMemoryBuffer({
llm: this.llm,
chatHistory:
params.chatHistory instanceof BaseMemory
? await params.chatHistory.getMessages()
: params.chatHistory,
})
? params.chatHistory instanceof Memory
? params.chatHistory
: createMemory(params.chatHistory)
: this.memory;
chatHistory.put({ content: message, role: "user" });
await chatHistory.add({ content: message, role: "user" });
if (stream) {
const stream = await this.llm.chat({
messages: await chatHistory.getMessages(),
messages: await chatHistory.getLLM(this.llm),
stream: true,
});
return streamConverter(
@@ -64,7 +56,7 @@ export class SimpleChatEngine implements BaseChatEngine {
initialValue: "",
reducer: (accumulator, part) => accumulator + part.delta,
finished: (accumulator) => {
chatHistory.put({ content: accumulator, role: "assistant" });
void chatHistory.add({ content: accumulator, role: "assistant" });
},
}),
EngineResponse.fromChatResponseChunk,
@@ -73,13 +65,13 @@ export class SimpleChatEngine implements BaseChatEngine {
const response = await this.llm.chat({
stream: false,
messages: await chatHistory.getMessages(),
messages: await chatHistory.getLLM(this.llm),
});
chatHistory.put(response.message);
await chatHistory.add(response.message);
return EngineResponse.fromChatResponse(response);
}
reset() {
this.memory.reset();
async reset() {
await this.memory.clear();
}
}
+7
View File
@@ -0,0 +1,7 @@
import type { MemoryMessage } from "../types";
export interface MessageAdapter<T, TMessageOptions extends object = object> {
fromMemory(message: MemoryMessage<TMessageOptions>): T;
toMemory(message: T): MemoryMessage<TMessageOptions>;
isCompatible(message: unknown): message is T;
}
+43
View File
@@ -0,0 +1,43 @@
import { randomUUID } from "@llamaindex/env";
import type { ChatMessage } from "../../llms";
import type { MemoryMessage } from "../types";
import { type MessageAdapter } from "./base";
export class ChatMessageAdapter<
AdditionalMessageOptions extends object = object,
> implements
MessageAdapter<
ChatMessage<AdditionalMessageOptions>,
AdditionalMessageOptions
>
{
fromMemory(
message: MemoryMessage<AdditionalMessageOptions>,
): ChatMessage<AdditionalMessageOptions> {
return {
content: message.content,
role: message.role,
options: message.options,
};
}
toMemory(
message: ChatMessage<AdditionalMessageOptions>,
): MemoryMessage<AdditionalMessageOptions> {
return {
id: randomUUID(),
createdAt: new Date(),
...message,
};
}
isCompatible(
message: unknown,
): message is ChatMessage<AdditionalMessageOptions> {
return !!(
message &&
typeof message === "object" &&
"role" in message &&
message.role &&
"content" in message
);
}
}
@@ -0,0 +1,3 @@
export * from "./base";
export * from "./chat";
export * from "./vercel";
+198
View File
@@ -0,0 +1,198 @@
import type {
ChatMessage,
MessageContent,
MessageContentDetail,
} from "../../llms";
import { extractText } from "../../utils";
import type { MemoryMessage } from "../types";
import type { MessageAdapter } from "./base";
// UIMessage from the vercel/ai package (external)
export type VercelMessage = {
id: string;
role: "system" | "user" | "assistant" | "data";
content: string;
createdAt?: Date | undefined;
annotations?: Array<unknown> | undefined;
parts: Array<{ type: string; [key: string]: unknown }>;
};
/**
* Utility class for converting between LlamaIndex ChatMessage and Vercel UI Message formats
*/
export class VercelMessageAdapter<
AdditionalMessageOptions extends object = object,
> implements MessageAdapter<VercelMessage, AdditionalMessageOptions>
{
/**
* Convert LlamaIndex ChatMessage to Vercel UI Message format
*/
fromMemory(memoryMessage: MemoryMessage<object>): VercelMessage {
const parts = this.convertMessageContentToVercelParts(
memoryMessage.content,
);
// Convert role to UI message role
let role: VercelMessage["role"];
switch (memoryMessage.role) {
case "system":
case "user":
case "assistant":
role = memoryMessage.role;
break;
case "memory":
role = "system";
break;
case "developer":
role = "user";
break;
default:
role = "user"; // Default fallback, should not happen
}
return {
id: memoryMessage.id,
role,
content: extractText(memoryMessage.content),
parts,
createdAt: memoryMessage.createdAt,
annotations: memoryMessage.annotations,
};
}
/**
* Convert Vercel UI Message to LlamaIndex ChatMessage format
*/
toMemory(uiMessage: VercelMessage): MemoryMessage<AdditionalMessageOptions> {
// Convert UI message role to MessageType
let role: ChatMessage["role"];
switch (uiMessage.role) {
case "system":
case "user":
case "assistant":
role = uiMessage.role;
break;
case "data":
role = "user"; // Map data role to user
break;
default:
role = "user"; // Default fallback, should not happen
}
// Convert parts to MessageContent
const content = this.convertVercelPartsToMessageContent(uiMessage.parts);
return {
id: uiMessage.id,
content: content ?? uiMessage.content,
role,
createdAt: uiMessage.createdAt,
annotations: uiMessage.annotations,
};
}
/**
* Validate if object matches VercelMessage structure
*/
isCompatible(message: unknown): message is VercelMessage {
return !!(
message &&
typeof message === "object" &&
"role" in message &&
"content" in message &&
"parts" in message
);
}
/**
* Convert UI parts to MessageContent
*/
private convertVercelPartsToMessageContent(
parts: VercelMessage["parts"],
): MessageContent | null {
if (parts.length === 0) {
return null;
}
const details: MessageContentDetail[] = [];
for (const part of parts) {
switch (part.type) {
case "file": {
details.push({
type: "file",
data: part.data as string,
mimeType: part.mimeType as string,
});
break;
}
default:
// For other part types, convert to text
details.push({
type: "text",
text: part.text as string,
});
break;
}
}
// If only one text detail, return as string
if (details.length === 1 && details[0]?.type === "text") {
return details[0].text;
}
return details;
}
/**
* Convert MessageContent to UI parts
*/
private convertMessageContentToVercelParts(
content: MessageContent,
): VercelMessage["parts"] {
if (typeof content === "string") {
return [
{
type: "text",
text: content,
},
];
}
const parts: VercelMessage["parts"] = [];
for (const detail of content) {
switch (detail.type) {
case "text":
parts.push({
type: "text",
text: detail.text,
});
break;
case "image_url":
parts.push({
type: "text",
text: `[Image URL: ${detail.image_url.url}]`,
});
break;
case "audio":
case "video":
case "image":
case "file":
parts.push({
type: "file",
data: detail.data,
mimeType: detail.type,
});
break;
default:
// For unknown types, create a text representation
parts.push({
type: "text",
text: JSON.stringify(detail),
});
}
}
return parts;
}
}
+50
View File
@@ -0,0 +1,50 @@
import { randomUUID } from "@llamaindex/env";
import type { MemoryMessage } from "../types";
export type MemoryBlockOptions = {
/**
* The id of the memory block.
*/
id?: string;
/**
* The priority of the memory block.
* Note: if priority is 0, the block content is always included in the memory context.
*/
priority: number;
/**
* Whether the memory block is long term.
* Default is true.
*/
isLongTerm?: boolean;
};
/**
* A base class for memory blocks.
*/
export abstract class BaseMemoryBlock<
TAdditionalMessageOptions extends object = object,
> {
public readonly id: string;
public readonly priority: number;
public readonly isLongTerm: boolean;
constructor(options: MemoryBlockOptions) {
this.id = options.id ?? `memory-block-${randomUUID()}`;
this.priority = options.priority;
this.isLongTerm = options.isLongTerm ?? true;
}
/**
* Pull the memory block content (async).
*
* @returns The memory block content as an array of ChatMessage.
*/
abstract get(): Promise<MemoryMessage<TAdditionalMessageOptions>[]>;
/**
* Store the messages in the memory block.
*/
abstract put(
messages: MemoryMessage<TAdditionalMessageOptions>[],
): Promise<void>;
}
+153
View File
@@ -0,0 +1,153 @@
import type { LLM, MessageType } from "../../llms";
import type { MemoryMessage } from "../types";
import { BaseMemoryBlock, type MemoryBlockOptions } from "./base";
const DEFAULT_EXTRACTION_PROMPT = `
You are a precise fact extraction system designed to identify key information from conversations.
CONVERSATION SEGMENT:
{{conversation}}
EXISTING FACTS:
{{existing_facts}}
INSTRUCTIONS:
1. Review the conversation segment provided above.
2. Extract specific, concrete facts the user has disclosed or important information discovered
3. Focus on factual information like preferences, personal details, requirements, constraints, or context
4. Do not include opinions, summaries, or interpretations - only extract explicit information
5. Do not duplicate facts that are already in the existing facts list
Respond with the new facts from the conversation segment using the following JSON format:
{
"facts": ["fact1", "fact2", "fact3", ...]
}
`;
const DEFAULT_SUMMARY_PROMPT = `
You are a precise fact condensing system designed to summarize facts in a concise manner.
EXISTING FACTS:
{{existing_facts}}
INSTRUCTIONS:
1. Review the current list of existing facts
2. Condense the facts into a more concise list, less than {{ max_facts }} facts
3. Focus on factual information like preferences, personal details, requirements, constraints, or context
4. Do not include opinions, summaries, or interpretations - only extract explicit information
5. Do not duplicate facts that are already in the existing facts list
Respond with the condensed facts using the following JSON format:
{
"facts": ["fact1", "fact2", "fact3", ...]
}
`;
/**
* The options for the fact extraction memory block.
*/
export type FactExtractionMemoryBlockOptions = {
/**
* The fact extraction model to use.
*/
llm: LLM;
/**
* The maximum number of facts to extract.
*/
maxFacts: number;
/**
* The prompt to use for fact extraction.
*/
extractionPrompt?: string;
/**
* The prompt to use for fact summary.
*/
summaryPrompt?: string;
} & MemoryBlockOptions & {
isLongTerm?: true;
};
/**
* A memory block that stores facts extracted from conversations.
*/
export class FactExtractionMemoryBlock<
TAdditionalMessageOptions extends object = object,
> extends BaseMemoryBlock<TAdditionalMessageOptions> {
private readonly llm: LLM;
private facts: string[] = [];
private readonly maxFacts: number;
private readonly extractionPrompt: string;
private readonly summaryPrompt: string;
constructor(options: FactExtractionMemoryBlockOptions) {
super(options);
this.llm = options.llm;
this.maxFacts = options.maxFacts;
this.extractionPrompt =
options.extractionPrompt ?? DEFAULT_EXTRACTION_PROMPT;
this.summaryPrompt = options.summaryPrompt ?? DEFAULT_SUMMARY_PROMPT;
}
async get(): Promise<MemoryMessage<TAdditionalMessageOptions>[]> {
const fact = {
id: this.id,
content: this.facts.join("\n"),
role: "memory" as MessageType,
};
return [fact];
}
async put(
messages: MemoryMessage<TAdditionalMessageOptions>[],
): Promise<void> {
if (messages.length === 0) {
return;
}
// Format existing facts
const existingFactsStr = `{ facts: [${this.facts.join(", ")}] }`;
// Format conversation
const conversation = `\n\t${messages.map((m) => m.content).join("\n\t")}`;
// Format prompt
const prompt = this.extractionPrompt
.replace("{{conversation}}", conversation)
.replace("{{existing_facts}}", existingFactsStr);
// Call the LLM
const response = await this.llm.complete({
prompt,
});
// Parse and validate the response
const newFacts = JSON.parse(response.text);
if (newFacts.facts === undefined || !Array.isArray(newFacts.facts)) {
throw new Error(
`[FactExtraction] Invalid response from LLM: ${response.text}`,
);
}
// No new facts, so no need to update the facts
if (newFacts.facts.length === 0) {
return;
}
// Update the facts
this.facts.push(...newFacts.facts);
// Condense the facts
if (this.facts.length > this.maxFacts) {
const existingFactsStr = `{ facts: [${this.facts.join(", ")}] }`;
const prompt = this.summaryPrompt
.replace("{{existing_facts}}", existingFactsStr)
.replace("{{max_facts}}", this.maxFacts.toString());
const response = await this.llm.complete({
prompt,
});
const condensedFacts = JSON.parse(response.text);
if (
condensedFacts.facts === undefined ||
!Array.isArray(condensedFacts.facts) ||
condensedFacts.facts.length === 0
) {
throw new Error("Invalid response from LLM");
}
// Only get the first maxFacts facts (in case the LLM returned more)
this.facts = condensedFacts.facts.slice(0, this.maxFacts);
}
}
}
+3
View File
@@ -0,0 +1,3 @@
export { BaseMemoryBlock } from "./base";
export { FactExtractionMemoryBlock } from "./fact";
export { StaticMemoryBlock } from "./static";
+51
View File
@@ -0,0 +1,51 @@
import type { MessageContent, MessageType } from "../../llms";
import type { MemoryMessage } from "../types";
import { BaseMemoryBlock, type MemoryBlockOptions } from "./base";
export type StaticMemoryBlockOptions = {
/**
* The static content to store.
*/
content: MessageContent;
/**
* The role of the message.
*/
messageRole?: MessageType;
} & Omit<MemoryBlockOptions, "priority" | "isLongTerm">;
/**
* A memory block that stores static content that doesn't change.
* Static content is always included in the memory context.
*/
export class StaticMemoryBlock<
TAdditionalMessageOptions extends object = object,
> extends BaseMemoryBlock<TAdditionalMessageOptions> {
private readonly content: MessageContent;
private readonly messageRole: MessageType;
constructor(options: StaticMemoryBlockOptions) {
super({ ...options, priority: 0, isLongTerm: false });
this.content = options.content;
this.messageRole = options.messageRole ?? "user";
}
/**
* Returns the static content.
* The messages parameter is ignored since this block contains static content.
*/
async get(): Promise<MemoryMessage<TAdditionalMessageOptions>[]> {
return [
{
id: this.id,
role: this.messageRole,
content: this.content,
},
];
}
async put(
_messages: MemoryMessage<TAdditionalMessageOptions>[],
): Promise<void> {
// No-op: static content doesn't change
}
}
@@ -1,13 +1,14 @@
import { Settings } from "../global";
import type { ChatMessage } from "../llms";
import { type BaseChatStore, SimpleChatStore } from "../storage/chat-store";
import { extractText } from "../utils";
import { Settings } from "../../global";
import type { ChatMessage } from "../../llms";
import { type BaseChatStore, SimpleChatStore } from "../../storage/chat-store";
import { extractText } from "../../utils";
export const DEFAULT_TOKEN_LIMIT_RATIO = 0.75;
export const DEFAULT_CHAT_STORE_KEY = "chat_history";
/**
* A ChatMemory is used to keep the state of back and forth chat messages
* @deprecated Use Memory instead.
*/
export abstract class BaseMemory<
AdditionalMessageOptions extends object = object,
@@ -55,6 +56,9 @@ export abstract class BaseMemory<
}
}
/**
* @deprecated Use Memory with snapshot feature with your own storage instead.
*/
export abstract class BaseChatStoreMemory<
AdditionalMessageOptions extends object = object,
> extends BaseMemory<AdditionalMessageOptions> {
@@ -1,6 +1,6 @@
import { Settings } from "../global";
import type { ChatMessage, LLM } from "../llms";
import { type BaseChatStore } from "../storage/chat-store";
import { Settings } from "../../global";
import type { ChatMessage, LLM } from "../../llms";
import { type BaseChatStore } from "../../storage/chat-store";
import { BaseChatStoreMemory, DEFAULT_TOKEN_LIMIT_RATIO } from "./base";
type ChatMemoryBufferOptions<AdditionalMessageOptions extends object = object> =
@@ -12,6 +12,9 @@ type ChatMemoryBufferOptions<AdditionalMessageOptions extends object = object> =
llm?: LLM<object, AdditionalMessageOptions> | undefined;
};
/**
* @deprecated Use Memory instead.
*/
export class ChatMemoryBuffer<
AdditionalMessageOptions extends object = object,
> extends BaseChatStoreMemory<AdditionalMessageOptions> {
@@ -1,10 +1,13 @@
import { type Tokenizer, tokenizers } from "@llamaindex/env/tokenizers";
import { Settings } from "../global";
import type { ChatMessage, LLM, MessageType } from "../llms";
import { defaultSummaryPrompt, type SummaryPrompt } from "../prompts";
import { extractText, messagesToHistory } from "../utils";
import { Settings } from "../../global";
import type { ChatMessage, LLM, MessageType } from "../../llms";
import { defaultSummaryPrompt, type SummaryPrompt } from "../../prompts";
import { extractText, messagesToHistory } from "../../utils";
import { BaseMemory } from "./base";
/**
* @deprecated Use Memory instead.
*/
export class ChatSummaryMemoryBuffer extends BaseMemory {
/**
* Tokenizer function that converts text to tokens,
+136
View File
@@ -0,0 +1,136 @@
import type { ChatMessage } from "../llms";
import { ChatMessageAdapter } from "./adapter/chat";
import {
FactExtractionMemoryBlock,
type FactExtractionMemoryBlockOptions,
} from "./block/fact";
import {
StaticMemoryBlock,
type StaticMemoryBlockOptions,
} from "./block/static";
import { DEFAULT_TOKEN_LIMIT, Memory, type MemoryOptions } from "./memory";
import type { MemoryMessage } from "./types";
/**
* Create a Memory instance with default options
* @returns A new Memory instance with default configuration
*/
export function createMemory<TMessageOptions extends object = object>(): Memory<
Record<string, never>,
TMessageOptions
>;
/**
* Create a Memory instance with options only
* @param options - Memory configuration options
* @returns A new Memory instance
*/
export function createMemory<TMessageOptions extends object = object>(
options: MemoryOptions<TMessageOptions>,
): Memory<Record<string, never>, TMessageOptions>;
/**
* Create a Memory instance with ChatMessage array (IDs will be generated)
* @param messages - Initial ChatMessage array for the memory
* @param options - Memory configuration options
* @returns A new Memory instance
*/
export function createMemory<TMessageOptions extends object = object>(
messages: ChatMessage<TMessageOptions>[],
options?: MemoryOptions<TMessageOptions>,
): Memory<Record<string, never>, TMessageOptions>;
/**
* Create a Memory instance with MemoryMessage array and options
* @param messages - Initial MemoryMessage array for the memory
* @param options - Memory configuration options
* @returns A new Memory instance
*/
export function createMemory<TMessageOptions extends object = object>(
messages: MemoryMessage<TMessageOptions>[],
options: MemoryOptions<TMessageOptions>,
): Memory<Record<string, never>, TMessageOptions>;
/**
* Create a Memory instance
* @param messagesOrOptions - Either initial messages or options
* @param options - Memory configuration options (when first param is messages)
* @returns A new Memory instance
*/
export function createMemory<TMessageOptions extends object = object>(
messagesOrOptions:
| ChatMessage<TMessageOptions>[]
| MemoryMessage<TMessageOptions>[]
| MemoryOptions<TMessageOptions> = [],
options: MemoryOptions<TMessageOptions> = {},
): Memory<Record<string, never>, TMessageOptions> {
let messages: MemoryMessage<TMessageOptions>[] = [];
if (Array.isArray(messagesOrOptions)) {
const firstMessage = messagesOrOptions[0];
if (firstMessage) {
if ("id" in firstMessage) {
messages = messagesOrOptions as MemoryMessage<TMessageOptions>[];
} else {
const adapter = new ChatMessageAdapter<TMessageOptions>();
messages = messagesOrOptions.map((chatMessage) =>
adapter.toMemory(chatMessage),
);
}
}
}
return new Memory<Record<string, never>, TMessageOptions>(messages, options);
}
/**
* create a StaticMemoryBlock
* @param options - Configuration options for the static memory block
* @returns A new StaticMemoryBlock instance
*/
export function staticBlock<TMessageOptions extends object = object>(
options: StaticMemoryBlockOptions,
): StaticMemoryBlock<TMessageOptions> {
return new StaticMemoryBlock<TMessageOptions>(options);
}
/**
* create a FactExtractionMemoryBlock
* @param options - Configuration options for the fact extraction memory block
* @returns A new FactExtractionMemoryBlock instance
*/
export function factExtractionBlock<TMessageOptions extends object = object>(
options: FactExtractionMemoryBlockOptions,
): FactExtractionMemoryBlock<TMessageOptions> {
return new FactExtractionMemoryBlock<TMessageOptions>(options);
}
/**
* Creates a new Memory instance from a snapshot
* @param snapshot The snapshot to load from
* @param options Optional MemoryOptions to apply when loading (including memory blocks)
* @returns A new Memory instance with the snapshot data and provided options
*/
export function loadMemory<TMessageOptions extends object = object>(
snapshot: string,
options?: MemoryOptions<TMessageOptions>,
): Memory<Record<string, never>, TMessageOptions> {
const { messages, tokenLimit, memoryCursor } = JSON.parse(snapshot);
// Merge snapshot data with provided options
const mergedOptions: MemoryOptions<TMessageOptions> = {
tokenLimit: options?.tokenLimit ?? tokenLimit ?? DEFAULT_TOKEN_LIMIT,
...(options?.shortTermTokenLimitRatio && {
shortTermTokenLimitRatio: options.shortTermTokenLimitRatio,
}),
...(options?.customAdapters && {
customAdapters: options.customAdapters,
}),
memoryBlocks: options?.memoryBlocks ?? [],
memoryCursor: memoryCursor ?? 0,
};
return new Memory<Record<string, never>, TMessageOptions>(
messages,
mergedOptions,
);
}
+9 -3
View File
@@ -1,3 +1,9 @@
export { BaseMemory } from "./base";
export { ChatMemoryBuffer } from "./chat-memory-buffer";
export { ChatSummaryMemoryBuffer } from "./summary-memory";
export { BaseMemory } from "./deprecated/base";
export { ChatMemoryBuffer } from "./deprecated/chat-memory-buffer";
export { ChatSummaryMemoryBuffer } from "./deprecated/summary-memory";
export * from "./adapter";
export * from "./block";
export * from "./factories";
export { Memory } from "./memory";
export * from "./types";
+401
View File
@@ -0,0 +1,401 @@
import { Settings } from "../global";
import type { ChatMessage, LLM } from "../llms";
import { extractText } from "../utils";
import { type MessageAdapter } from "./adapter/base";
import { ChatMessageAdapter } from "./adapter/chat";
import { VercelMessageAdapter } from "./adapter/vercel";
import type { BaseMemoryBlock } from "./block/base.js";
import { DEFAULT_TOKEN_LIMIT_RATIO } from "./deprecated/base";
import type { MemoryMessage } from "./types";
export const DEFAULT_TOKEN_LIMIT = 30000;
const DEFAULT_SHORT_TERM_TOKEN_LIMIT_RATIO = 0.7;
type BuiltinAdapters<TMessageOptions extends object = object> = {
vercel: VercelMessageAdapter;
llamaindex: ChatMessageAdapter<TMessageOptions>;
};
export type MemoryOptions<TMessageOptions extends object = object> = {
tokenLimit?: number;
/**
* How much of the token limit is used for short term memory.
* The remaining token limit is used for long term memory.
* Default is 0.5.
*/
shortTermTokenLimitRatio?: number;
customAdapters?: Record<string, MessageAdapter<unknown, object>>;
memoryBlocks?: BaseMemoryBlock<TMessageOptions>[];
/**
* The cursor position for tracking processed messages into long-term memory.
* Used internally for memory restoration from snapshots.
*/
memoryCursor?: number;
};
export class Memory<
TAdapters extends Record<
string,
MessageAdapter<unknown, TMessageOptions>
> = Record<string, never>,
TMessageOptions extends object = object,
> {
/**
* Hold all messages put into the memory.
*/
private messages: MemoryMessage<TMessageOptions>[] = [];
/**
* The token limit for memory retrieval results.
*/
private tokenLimit: number = DEFAULT_TOKEN_LIMIT;
/**
* The ratio of the token limit for short term memory.
*/
private shortTermTokenLimitRatio: number =
DEFAULT_SHORT_TERM_TOKEN_LIMIT_RATIO;
/**
* The adapters for the memory.
*/
private adapters: TAdapters & BuiltinAdapters<TMessageOptions>;
/**
* The memory blocks for the memory.
*/
private memoryBlocks: BaseMemoryBlock<TMessageOptions>[] = [];
/**
* The cursor for the messages that have been processed into long-term memory.
*/
private memoryCursor: number = 0;
constructor(
messages: MemoryMessage<TMessageOptions>[] = [],
options: MemoryOptions<TMessageOptions> = {},
) {
this.messages = messages;
this.tokenLimit = options.tokenLimit ?? DEFAULT_TOKEN_LIMIT;
this.shortTermTokenLimitRatio =
options.shortTermTokenLimitRatio ?? DEFAULT_SHORT_TERM_TOKEN_LIMIT_RATIO;
this.memoryBlocks = options.memoryBlocks ?? [];
this.memoryCursor = options.memoryCursor ?? 0;
this.adapters = {
...options.customAdapters,
vercel: new VercelMessageAdapter(),
llamaindex: new ChatMessageAdapter(),
} as TAdapters & BuiltinAdapters<TMessageOptions>;
}
/**
* Add a message to the memory
* @param message - The message to add to the memory
*/
async add(message: unknown): Promise<void> {
let memoryMessage: MemoryMessage<TMessageOptions> | null = null;
// Try to find a compatible adapter among the other adapters
for (const key in this.adapters) {
const adapter = this.adapters[key as keyof typeof this.adapters];
if (adapter?.isCompatible(message)) {
memoryMessage = adapter.toMemory(message);
break;
}
}
if (memoryMessage) {
this.messages.push(memoryMessage);
// Automatically manage memory blocks when new messages are added
await this.manageMemoryBlocks();
} else {
throw new Error(
`None of the adapters ${Object.keys(this.adapters).join(", ")} are compatible with the message. ${JSON.stringify(message)}`,
);
}
}
/**
* Get the messages of specific type from the memory
* @param options - The options for the get method
* @returns The messages of specific type
*/
async get<
K extends keyof (TAdapters &
BuiltinAdapters<TMessageOptions>) = "llamaindex",
>(
options: {
type?: K;
transientMessages?: ChatMessage<TMessageOptions>[];
} = {},
): Promise<
K extends keyof (TAdapters & BuiltinAdapters<TMessageOptions>)
? ReturnType<
(TAdapters & BuiltinAdapters<TMessageOptions>)[K]["fromMemory"]
>[]
: never
> {
const { type = "llamaindex", transientMessages } = options;
const adapter = this.adapters[type as keyof typeof this.adapters];
if (!adapter) {
throw new Error(`No adapter registered for type "${String(type)}"`);
}
let messages = this.messages;
if (transientMessages && transientMessages.length > 0) {
messages = [
...this.messages,
...transientMessages.map((m) => this.adapters.llamaindex.toMemory(m)),
];
}
// Convert memory messages to chat messages for memory block processing
const chatMessages = messages.map((m) => adapter.fromMemory(m));
return chatMessages as unknown as Promise<
K extends keyof (TAdapters & BuiltinAdapters<TMessageOptions>)
? ReturnType<
(TAdapters & BuiltinAdapters<TMessageOptions>)[K]["fromMemory"]
>[]
: never
>;
}
/**
* Get the messages from the memory, optionally including transient messages.
* only return messages that are within context window of the LLM
* @param llm - To fit the result messages to the context window of the LLM. If not provided, the default token limit will be used.
* @param transientMessages - Optional transient messages to include.
* @returns The messages from the memory, optionally including transient messages.
*/
async getLLM(
llm?: LLM,
transientMessages?: ChatMessage<TMessageOptions>[],
): Promise<ChatMessage[]> {
// Priority of result messages:
// [Fixed blocks (priority=0), Long term blocks, Short term messages(oldest to newest), Transient messages]
const contextWindow = llm?.metadata.contextWindow;
const tokenLimit = contextWindow
? Math.ceil(contextWindow * DEFAULT_TOKEN_LIMIT_RATIO)
: this.tokenLimit;
// Start with fixed block messages (priority=0)
// as it must always be included in the retrieval result
const messages = await this.getMemoryBlockMessages(
this.memoryBlocks.filter((block) => block.priority === 0),
tokenLimit,
);
// remaining token limit for short-term and memory blocks content
const remainingTokenLimit =
tokenLimit -
this.countMessagesToken([...messages, ...(transientMessages || [])]);
// if transient messages are provided, we need to check if they fit within the token limit
if (remainingTokenLimit < 0) {
throw new Error(
`Could not fit fixed blocks and transient messages within memory context`,
);
}
// Get messages for short-term and memory blocks
const shortTermTokenLimit = Math.ceil(
remainingTokenLimit * this.shortTermTokenLimitRatio,
);
const memoryBlocksTokenLimit = remainingTokenLimit - shortTermTokenLimit;
// Add long-term memory blocks (priority > 0)
const longTermBlocks = [...this.memoryBlocks]
.filter((block) => block.priority !== 0)
.sort((a, b) => b.priority - a.priority);
const longTermBlockMessages = await this.getMemoryBlockMessages(
longTermBlocks,
memoryBlocksTokenLimit,
);
messages.push(...longTermBlockMessages);
// Process short-term messages (newest first for token efficiency, but maintain chronological order in result)
const shortTermMessagesResult: ChatMessage<TMessageOptions>[] = [];
const unprocessedMessages = this.messages.slice(this.memoryCursor);
// Process from newest to oldest for token efficiency
for (let i = unprocessedMessages.length - 1; i >= 0; i--) {
const memoryMessage = unprocessedMessages[i];
if (!memoryMessage) continue;
const chatMessage = this.adapters.llamaindex.fromMemory(memoryMessage);
// Check if adding this message would exceed token limit
const newTokenCount =
this.countMessagesToken(shortTermMessagesResult) +
this.countMessagesToken([chatMessage]) +
this.countMessagesToken(transientMessages || []);
if (newTokenCount > shortTermTokenLimit) {
// Token limit reached, stop processing older messages
break;
}
shortTermMessagesResult.push(chatMessage);
}
// reverse the short-term messages to maintain chronological order (oldest to newest)
messages.push(...shortTermMessagesResult.reverse());
// Add transient messages at the end
if (transientMessages && transientMessages.length > 0) {
messages.push(...transientMessages);
}
return messages;
}
/**
* Get the content from the memory blocks
* also convert the content to chat messages
* @param blocks - The blocks to get the content from
* @param tokenLimit - The token limit for the memory blocks, if not provided, all the memory blocks will be included
*/
private async getMemoryBlockMessages(
blocks: BaseMemoryBlock<TMessageOptions>[],
tokenLimit?: number,
): Promise<ChatMessage<TMessageOptions>[]> {
if (blocks.length === 0) {
return [];
}
// Sort memory blocks by priority (highest first)
const sortedBlocks = [...blocks].sort((a, b) => b.priority - a.priority);
const memoryContent: ChatMessage<TMessageOptions>[] = [];
// Get up to the token limit of the memory blocks
let addedTokenCount = 0;
for (const block of sortedBlocks) {
try {
const content = await block.get();
for (const message of content) {
const chatMessage = this.adapters.llamaindex.fromMemory(message);
const messageTokenCount = this.countMessagesToken([chatMessage]);
if (tokenLimit && addedTokenCount + messageTokenCount > tokenLimit) {
return memoryContent;
}
memoryContent.push(chatMessage);
addedTokenCount += messageTokenCount;
}
} catch (error) {
console.warn(
`Failed to get content from memory block ${block.id}:`,
error,
);
}
}
return memoryContent;
}
/**
* Manage the memory blocks
* This method processes new messages into memory blocks when short-term memory exceeds its token limit.
* It uses a cursor system to track which messages have already been processed into long-term memory.
*/
async manageMemoryBlocks(): Promise<void> {
// Early return if no memory blocks configured
if (this.memoryBlocks.length === 0) {
return;
}
// Should always calculate the number
const shortTermTokenLimit = Math.ceil(
this.tokenLimit * this.shortTermTokenLimitRatio,
);
// Check if unprocessed messages exceed the short term token limit
const unprocessedMessages = this.getUnprocessedMessages();
const unprocessedMessagesTokenCount =
this.countMemoryMessagesToken(unprocessedMessages);
if (unprocessedMessagesTokenCount <= shortTermTokenLimit) {
// No need to manage memory blocks yet
return;
}
await this.processMessagesIntoMemoryBlocks(unprocessedMessages);
this.updateMemoryCursor(unprocessedMessages.length);
}
/**
* Get messages that haven't been processed into long-term memory yet
*/
private getUnprocessedMessages(): MemoryMessage<TMessageOptions>[] {
if (this.memoryCursor >= this.messages.length) {
return [];
}
return this.messages.slice(this.memoryCursor);
}
/**
* Process new messages into all memory blocks
*/
private async processMessagesIntoMemoryBlocks(
newMessages: MemoryMessage<TMessageOptions>[],
): Promise<void> {
const longTermMemoryBlocks = this.memoryBlocks.filter(
(block) => block.isLongTerm,
);
const promises = longTermMemoryBlocks.map(async (block) => {
try {
await block.put(newMessages);
} catch (error) {
console.warn(
`Failed to process messages into memory block ${block.id}:`,
error,
);
// Continue processing other blocks even if one fails
}
});
// Wait for all memory blocks to process the messages
await Promise.all(promises);
}
/**
* Update the memory cursor after successful processing
*/
private updateMemoryCursor(processedCount: number): void {
this.memoryCursor += processedCount;
// Ensure cursor doesn't exceed message count
this.memoryCursor = Math.min(this.memoryCursor, this.messages.length);
}
/**
* Clear all the messages in the memory
*/
async clear(): Promise<void> {
this.messages = [];
this.memoryCursor = 0; // Reset cursor when clearing messages
}
/**
* Creates a snapshot of the current memory state
* Note: Memory blocks are not included in snapshots as they may contain non-serializable content.
* Memory blocks should be recreated when loading from snapshot.
* @returns A JSON-serializable object containing the memory state
*/
snapshot(): string {
return JSON.stringify({
messages: this.messages,
memoryCursor: this.memoryCursor,
});
}
private countMemoryMessagesToken(
messages: MemoryMessage<TMessageOptions>[],
): number {
return this.countMessagesToken(
messages.map((m) =>
this.adapters.llamaindex.fromMemory(m),
) as ChatMessage[],
);
}
private countMessagesToken(messages: ChatMessage[]): number {
if (messages.length === 0) {
return 0;
}
const tokenizer = Settings.tokenizer;
const str = messages.map((m) => extractText(m.content)).join(" ");
return tokenizer.encode(str).length;
}
}
+19
View File
@@ -0,0 +1,19 @@
import type { ChatMessage } from "../llms";
/**
* Additional properties for storing additional data to memory messages
* using the same properties as vercel/ai for simplicity
*/
export type MemoryMessageExtension = {
id: string;
createdAt?: Date | undefined;
annotations?: Array<unknown> | undefined;
};
export type MemoryMessage<AdditionalMessageOptions extends object = object> =
ChatMessage<AdditionalMessageOptions> & MemoryMessageExtension;
export type MemorySnapshot = {
messages: MemoryMessage[];
tokenLimit: number;
};
@@ -17,7 +17,7 @@ export class SentenceWindowNodeParser extends NodeParser<TextNode[]> {
windowSize: number;
windowMetadataKey: string;
originalTextMetadataKey: string;
sentenceSplitter: TextSplitterFn = splitBySentenceTokenizer();
sentenceSplitter: TextSplitterFn = splitBySentenceTokenizer([], true);
idGenerator: () => string = () => randomUUID();
constructor(params?: z.input<typeof sentenceWindowNodeParserSchema>) {
+1 -1
View File
@@ -1,5 +1,5 @@
declare class SentenceTokenizer {
constructor(abbreviations?: string[]);
constructor(abbreviations?: string[], trimSentences?: boolean);
tokenize(text: string): string[];
}
@@ -1,3 +1,24 @@
/*
Copyright (c) 2024, Hugo W.L. ter Doest
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
var __getOwnPropNames = Object.getOwnPropertyNames;
var __commonJS = (cb, mod) =>
function __require() {
@@ -30,32 +51,47 @@ var require_tokenizer = __commonJS({
// lib/natural/tokenizers/sentence_tokenizer.js
var require_sentence_tokenizer = __commonJS({
"lib/natural/tokenizers/sentence_tokenizer.js"(exports, module) {
var Tokenizer = require_tokenizer();
var NUM = "NUMBER";
var DELIM = "DELIM";
var URI = "URI";
var ABBREV = "ABBREV";
var DEBUG = false;
const Tokenizer = require_tokenizer();
// Strings that will be used to create placeholders
const NUM = "NUMBER";
const DELIM = "DELIM";
const URI = "URI";
const ABBREV = "ABBREV";
const DEBUG = false;
function generateUniqueCode(base, index) {
// Surround the placeholder with {{}} to prevent shorter numbers to be recognized
// in larger numbers
return `{{${base}_${index}}}`;
}
function escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
var SentenceTokenizer = class extends Tokenizer {
constructor(abbreviations) {
class SentenceTokenizer extends Tokenizer {
constructor(abbreviations, trimSentences) {
super();
if (abbreviations) {
this.abbreviations = abbreviations;
} else {
this.abbreviations = [];
}
if (trimSentences === undefined) {
this.trimSentences = true;
} else {
this.trimSentences = trimSentences;
}
this.replacementMap = null;
this.replacementCounter = 0;
}
replaceUrisWithPlaceholders(text) {
const urlPattern =
/(https?:\/\/\S+|www\.\S+|ftp:\/\/\S+|(mailto:)?[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}|file:\/\/\S+)/gi;
const modifiedText = text.replace(urlPattern, (match) => {
const placeholder = generateUniqueCode(
URI,
@@ -64,8 +100,10 @@ var require_sentence_tokenizer = __commonJS({
this.replacementMap.set(placeholder, match);
return placeholder;
});
return modifiedText;
}
replaceAbbreviations(text) {
if (this.abbreviations.length === 0) {
return text;
@@ -79,9 +117,14 @@ var require_sentence_tokenizer = __commonJS({
this.replacementMap.set(code, match);
return code;
});
return replacedText;
}
replaceDelimitersWithPlaceholders(text) {
// Regular expression for sentence delimiters optionally followed by a bracket or quote
// Multiple delimiters with spaces in between are allowed
// The expression makes sure that the sentence delimiter group ends with a sentence delimiter
const delimiterPattern = /([.?!… ]*)([.?!…])(["'”’)}\]]?)/g;
const modifiedText = text.replace(
delimiterPattern,
@@ -94,32 +137,42 @@ var require_sentence_tokenizer = __commonJS({
return placeholder;
},
);
return modifiedText;
}
splitOnPlaceholders(text, placeholders) {
if (this.delimiterMap.size === 0) {
return [text];
}
const keys = Array.from(this.delimiterMap.keys());
const pattern = new RegExp(`(${keys.map(escapeRegExp).join("|")})`);
const parts = text.split(pattern);
const sentences = [];
for (let i = 0; i < parts.length; i += 2) {
const sentence = parts[i];
const placeholder = parts[i + 1] || "";
sentences.push(sentence + placeholder);
}
return sentences;
}
replaceNumbersWithCode(text) {
// Regular expression to match numbers, including decimal points and commas
const numberPattern = /\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b/g;
const replacedText = text.replace(numberPattern, (match) => {
const code = generateUniqueCode(NUM, this.replacementCounter++);
this.replacementMap.set(code, match);
return code;
});
return replacedText;
}
revertReplacements(text) {
let originalText = text;
for (const [
@@ -129,16 +182,20 @@ var require_sentence_tokenizer = __commonJS({
const pattern = new RegExp(escapeRegExp(placeholder), "g");
originalText = originalText.replace(pattern, replacement);
}
return originalText;
}
revertDelimiters(text) {
let originalText = text;
for (const [placeholder, replacement] of this.delimiterMap.entries()) {
const pattern = new RegExp(escapeRegExp(placeholder), "g");
originalText = originalText.replace(pattern, replacement);
}
return originalText;
}
tokenize(text) {
this.replacementCounter = 0;
this.replacementMap = /* @__PURE__ */ new Map();
@@ -148,32 +205,43 @@ var require_sentence_tokenizer = __commonJS({
"---Start of sentence tokenization-----------------------",
);
DEBUG && console.log("Original input: >>>" + text + "<<<");
// Replace abbreviations
const result1 = this.replaceAbbreviations(text);
DEBUG &&
console.log(
"Phase 1: replacing abbreviations: " + JSON.stringify(result1),
);
// Replace URIs
const result2 = this.replaceUrisWithPlaceholders(result1);
DEBUG &&
console.log("Phase 2: replacing URIs: " + JSON.stringify(result2));
// Replace delimiters followed by optional quotes, brackets, and braces
const result3 = this.replaceNumbersWithCode(result2);
DEBUG &&
console.log(
"Phase 3: replacing numbers with placeholders: " +
JSON.stringify(result3),
);
// Replace delimiters followed by optional quotes, brackets, and braces
const result4 = this.replaceDelimitersWithPlaceholders(result3);
DEBUG &&
console.log(
"Phase 4: replacing delimiters with placeholders: " +
JSON.stringify(result4),
);
// Split on placeholders for sentence delimiters
const sentences = this.splitOnPlaceholders(result4);
DEBUG &&
console.log(
"Phase 5: splitting into sentences on placeholders: " +
JSON.stringify(sentences),
);
// Replace back all abbreviations, URIs, and delimiters
const newSentences = sentences.map((s) => {
const s1 = this.revertReplacements(s);
return this.revertDelimiters(s1);
@@ -183,13 +251,17 @@ var require_sentence_tokenizer = __commonJS({
"Phase 6: replacing back abbreviations, URIs, numbers and delimiters: " +
JSON.stringify(newSentences),
);
const trimmedSentences = this.trim(newSentences);
DEBUG &&
console.log(
"Phase 7: trimming array of empty sentences: " +
JSON.stringify(trimmedSentences),
);
const trimmedSentences2 = trimmedSentences.map((sent) => sent.trim());
const trimmedSentences2 = trimmedSentences.map((sent) =>
this.trimSentences ? sent.trim() : sent,
);
DEBUG &&
console.log(
"Phase 8: trimming sentences from surrounding whitespace: " +
@@ -213,9 +285,10 @@ var require_sentence_tokenizer = __commonJS({
console.log(
"---------------------------------------------------------",
);
return trimmedSentences2;
}
};
}
module.exports = SentenceTokenizer;
},
});
+10 -6
View File
@@ -37,13 +37,17 @@ export const splitByChar = (): TextSplitterFn => {
export const splitBySentenceTokenizer = (
extraAbbreviations: string[] | undefined = [],
trimSentences: boolean = false,
): TextSplitterFn => {
const tokenizer = new SentenceTokenizer([
...abbreviations.english,
...abbreviations.spanish,
// Add the extra abbreviations provided by the user, e.g. for business-specific context
...extraAbbreviations,
]);
const tokenizer = new SentenceTokenizer(
[
...abbreviations.english,
...abbreviations.spanish,
// Add the extra abbreviations provided by the user, e.g. for business-specific context
...extraAbbreviations,
],
trimSentences,
);
return (text: string) => {
try {
return tokenizer.tokenize(text);
+12 -8
View File
@@ -101,17 +101,21 @@ export class SimpleKVStore extends BaseKVStore {
static async fromPersistPath(persistPath: string): Promise<SimpleKVStore> {
const dirPath = path.dirname(persistPath);
if (!(await exists(dirPath))) {
await fs.mkdir(dirPath);
await fs.mkdir(dirPath, { recursive: true });
}
let data: DataType = {};
try {
const fileData = await fs.readFile(persistPath);
data = JSON.parse(fileData.toString());
} catch (e) {
console.error(
`No valid data found at path: ${persistPath} starting new store.`,
);
if (!(await exists(persistPath))) {
console.info(`Starting new store from path: ${persistPath}`);
} else {
try {
const fileData = await fs.readFile(persistPath);
data = JSON.parse(fileData.toString());
} catch (e) {
throw new Error(`Failed to load data from path: ${persistPath}`, {
cause: e,
});
}
}
const store = new SimpleKVStore(data);
+103
View File
@@ -0,0 +1,103 @@
import { filetypemime } from "magic-bytes.js";
/**
* Converts a base64 string (without data: prefix) to a Uint8Array
* @param base64 - The base64 string without data: prefix
* @returns The Uint8Array
*/
export function base64ToUint8Array(base64: string): Uint8Array {
// Decode Base64 string
const binaryString = atob(base64);
// Convert binary string to Uint8Array
const bytes = new Uint8Array(binaryString.length);
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes;
}
/**
* Converts a Uint8Array to a base64 string.
* @param uint8Array The Uint8Array to convert.
* @returns The base64-encoded string.
*/
export function uint8ArrayToBase64(uint8Array: Uint8Array): string {
let binary = "";
for (let i = 0; i < uint8Array.byteLength; i++) {
// Asserts that the value is not undefined, for `noUncheckedIndexedAccess`
binary += String.fromCharCode(uint8Array[i]!);
}
return btoa(binary);
}
/**
* Extracts the MIME type from a data URL.
* @param dataUrl The data URL string.
* @returns The MIME type from the data URL.
* @throws An error if the data URL is malformed.
*/
export function getMimeTypeFromDataUrl(dataUrl: string): string {
if (!dataUrl.startsWith("data:")) {
throw new Error("Not a data URL");
}
const commaIndex = dataUrl.indexOf(",");
if (commaIndex === -1) {
throw new Error("Invalid data URL format");
}
const header = dataUrl.slice(0, commaIndex);
const semicolonIndex = header.indexOf(";base64");
if (semicolonIndex === -1) {
throw new Error("Invalid data URL format: missing base64 encoding");
}
return header.slice(5, semicolonIndex);
}
/**
* Convert base64 data to Blob
* @param base64 - The base64 string
* @param mimeType - The MIME type of the file
* @returns The Blob
*/
export function base64ToBlob(base64: string, mimeType?: string): Blob {
let extractedMimeType = mimeType;
let base64Data = base64;
// Extract mimeType from data URL if not provided
if (!mimeType && base64.startsWith("data:")) {
extractedMimeType = getMimeTypeFromDataUrl(base64);
base64Data = base64.slice(base64.indexOf(",") + 1);
} else if (!mimeType) {
throw new Error(
"No MIME type provided and base64 is not in data URL format",
);
} else {
// Extract base64 data from data URL if present
const commaIndex = base64.indexOf(",");
base64Data = commaIndex !== -1 ? base64.slice(commaIndex + 1) : base64;
}
if (!extractedMimeType) {
throw new Error("No MIME type found in base64 data");
}
// convert base64 to Uint8Array
const bytes = base64ToUint8Array(base64Data);
// Create Blob
return new Blob([bytes], { type: extractedMimeType });
}
export async function blobToDataUrl(input: Blob) {
const arrayBuffer = await input.arrayBuffer();
const uint8Array = new Uint8Array(arrayBuffer);
const mimes = filetypemime(uint8Array);
if (mimes.length < 1) {
throw new Error("Unsupported image type");
}
const base64 = uint8ArrayToBase64(uint8Array);
return `data:${mimes[0]};base64,${base64}`;
}
+1
View File
@@ -72,5 +72,6 @@ export {
export { MockLLM } from "./mock";
export * from "./encoding";
export { objectEntries } from "./object-entries";
export * from "./stream";
+1 -10
View File
@@ -1,5 +1,4 @@
import { fs } from "@llamaindex/env";
import { filetypemime } from "magic-bytes.js";
import type {
ChatMessage,
MessageContent,
@@ -9,6 +8,7 @@ import type {
} from "../llms";
import type { QueryType } from "../query-engine";
import type { ImageType } from "../schema";
import { blobToDataUrl } from "./encoding";
/**
* Extracts just the text whether from
@@ -110,15 +110,6 @@ export function toToolDescriptions(tools: ToolMetadata[]): string {
return JSON.stringify(toolsObj, null, 4);
}
async function blobToDataUrl(input: Blob) {
const buffer = Buffer.from(await input.arrayBuffer());
const mimes = filetypemime(buffer);
if (mimes.length < 1) {
throw new Error("Unsupported image type");
}
return "data:" + mimes[0] + ";base64," + buffer.toString("base64");
}
export async function imageToDataUrl(
input: ImageType | Uint8Array,
): Promise<string> {
+99
View File
@@ -0,0 +1,99 @@
import {
base64ToBlob,
base64ToUint8Array,
blobToDataUrl,
getMimeTypeFromDataUrl,
uint8ArrayToBase64,
} from "@llamaindex/core/utils";
import { describe, expect, it } from "vitest";
const testString = "LlamaIndex";
const testBase64 = "TGxhbWFJbmRleA=="; // btoa('LlamaIndex')
const testUint8Array = new TextEncoder().encode(testString);
const pngB64 =
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
const pngMime = "image/png";
const pngDataUrl = `data:${pngMime};base64,${pngB64}`;
const pngBinaryString = atob(pngB64);
const pngBytes = new Uint8Array(pngBinaryString.length);
for (let i = 0; i < pngBinaryString.length; i++) {
pngBytes[i] = pngBinaryString.charCodeAt(i);
}
describe("Encoding utils", () => {
describe("base64ToUint8Array", () => {
it("should correctly convert a base64 string to a Uint8Array", () => {
const result = base64ToUint8Array(testBase64);
expect(result).toBeInstanceOf(Uint8Array);
expect(result).toEqual(testUint8Array);
});
});
describe("uint8ArrayToBase64", () => {
it("should correctly convert a Uint8Array to a base64 string", () => {
const result = uint8ArrayToBase64(testUint8Array);
expect(result).toBe(testBase64);
});
});
describe("getMimeTypeFromDataUrl", () => {
it("should extract the correct MIME type from a data URL", () => {
const result = getMimeTypeFromDataUrl(pngDataUrl);
expect(result).toBe(pngMime);
});
it("should throw an error for non-data URLs", () => {
expect(() => getMimeTypeFromDataUrl("not a data url")).toThrow(
"Not a data URL",
);
});
it("should throw an error for malformed data URLs", () => {
expect(() => getMimeTypeFromDataUrl("data:image/pngbase64,abc")).toThrow(
"Invalid data URL format: missing base64 encoding",
);
expect(() => getMimeTypeFromDataUrl("data:image/png;base64")).toThrow(
"Invalid data URL format",
);
});
});
describe("base64ToBlob", () => {
it("should convert from a data URL string", async () => {
const blob = base64ToBlob(pngDataUrl);
expect(blob).toBeInstanceOf(Blob);
expect(blob.type).toBe(pngMime);
const arrayBuffer = await blob.arrayBuffer();
expect(new Uint8Array(arrayBuffer)).toEqual(pngBytes);
});
it("should convert from a base64 string with an explicit MIME type", async () => {
const blob = base64ToBlob(pngB64, pngMime);
expect(blob).toBeInstanceOf(Blob);
expect(blob.type).toBe(pngMime);
const arrayBuffer = await blob.arrayBuffer();
expect(new Uint8Array(arrayBuffer)).toEqual(pngBytes);
});
it("should prioritize the explicit MIME type if a data URL is provided", async () => {
const differentMime = "image/jpeg";
const blob = base64ToBlob(pngDataUrl, differentMime);
expect(blob.type).toBe(differentMime);
});
it("should throw an error if no MIME type can be determined", () => {
expect(() => base64ToBlob(pngB64)).toThrow(
"No MIME type provided and base64 is not in data URL format",
);
});
});
describe("blobToDataUrl", () => {
it("should correctly convert a blob to a data URL", async () => {
const blob = new Blob([pngBytes], { type: "image/png" });
const result = await blobToDataUrl(blob);
expect(result).toBe(pngDataUrl);
});
});
});
+395
View File
@@ -0,0 +1,395 @@
import { Settings } from "@llamaindex/core/global";
import type { ChatMessage, LLM } from "@llamaindex/core/llms";
import { createMemory, Memory } from "@llamaindex/core/memory";
import { MockLLM } from "@llamaindex/core/utils";
import type { Tokenizer } from "@llamaindex/env/tokenizers";
import {
afterAll,
beforeAll,
beforeEach,
describe,
expect,
test,
} from "vitest";
// Mock tokenizer that returns predictable token counts
const createMockTokenizer = (): Tokenizer => ({
encode: (text: string): Uint32Array => {
// Simple mock: 1 token per 4 characters (rounded up)
const tokenCount = Math.ceil(text.length / 4);
return new Uint32Array(Array.from({ length: tokenCount }, (_, i) => i));
},
decode: (tokens: Uint32Array): string => {
// Simple mock: just return a string based on token count
return `decoded_${tokens.length}_tokens`;
},
});
// Helper function to create mock LLMs with different context windows
const createMockLLM = (contextWindow: number): LLM =>
new MockLLM({
metadata: {
contextWindow,
model: "test-model",
temperature: 0.7,
topP: 1.0,
tokenizer: undefined,
structuredOutput: false,
},
});
describe("Memory", () => {
let memory: Memory;
let originalTokenizer: Tokenizer;
beforeAll(() => {
// Save original tokenizer and set mock
originalTokenizer = Settings.tokenizer;
Settings.tokenizer = createMockTokenizer();
});
afterAll(() => {
// Restore original tokenizer
Settings.tokenizer = originalTokenizer;
});
beforeEach(() => {
memory = createMemory();
});
describe("add", () => {
test("should add LlamaIndex ChatMessage", async () => {
const message: ChatMessage = {
role: "user",
content: "Hello, world!",
};
await memory.add(message);
const messages = await memory.get();
expect(messages).toHaveLength(1);
expect(messages[0]).toEqual(message);
});
test("should add Vercel UI Message and convert to ChatMessage", async () => {
const vercelMessage = {
id: "test-id",
role: "user",
content: "Hello from Vercel!",
parts: [{ type: "text", text: "Hello from Vercel!" }],
createdAt: new Date(),
annotations: [],
};
await memory.add(vercelMessage);
const messages = await memory.get();
expect(messages).toHaveLength(1);
expect(messages[0]).toEqual({
role: "user",
content: "Hello from Vercel!",
});
});
test("should add multiple messages in sequence", async () => {
const message1: ChatMessage = { role: "user", content: "First message" };
const message2: ChatMessage = {
role: "assistant",
content: "Second message",
};
await memory.add(message1);
await memory.add(message2);
const messages = await memory.get();
expect(messages).toHaveLength(2);
expect(messages[0]).toEqual(message1);
expect(messages[1]).toEqual(message2);
});
});
describe("get", () => {
beforeEach(async () => {
// Add some test messages
await memory.add({ role: "user", content: "User message" });
await memory.add({ role: "assistant", content: "Assistant response" });
});
test("should return messages in LlamaIndex format by default", async () => {
const messages = await memory.get();
expect(messages).toHaveLength(2);
expect(messages[0]).toEqual({ role: "user", content: "User message" });
expect(messages[1]).toEqual({
role: "assistant",
content: "Assistant response",
});
});
test("should return messages in LlamaIndex format when explicitly requested", async () => {
const messages = await memory.get({ type: "llamaindex" });
expect(messages).toHaveLength(2);
expect(messages[0]).toEqual({ role: "user", content: "User message" });
expect(messages[1]).toEqual({
role: "assistant",
content: "Assistant response",
});
});
test("should add and get messages in LlamaIndex format when explicitly requested with options", async () => {
const message = {
role: "user",
content: "Hello, world!",
options: {
temperature: 0.7,
topP: 1.0,
},
};
await memory.add(message);
const messages = await memory.get({ type: "llamaindex" });
expect(messages[messages.length - 1]).toEqual({
role: "user",
content: "Hello, world!",
options: {
temperature: 0.7,
topP: 1.0,
},
});
});
test("should return messages in Vercel format when requested", async () => {
const messages = await memory.get({ type: "vercel" });
expect(messages).toHaveLength(2);
expect(messages[0]).toMatchObject({
role: "user",
content: "User message",
parts: [{ type: "text", text: "User message" }],
});
expect(messages[1]).toMatchObject({
role: "assistant",
content: "Assistant response",
parts: [{ type: "text", text: "Assistant response" }],
});
// Check that IDs and timestamps are generated
expect(typeof messages[0]).toBe("object");
expect(messages[0]).toHaveProperty("id");
expect(messages[0]).toHaveProperty("parts");
expect(messages[0]?.parts).toHaveLength(1);
expect(messages[1]).toHaveProperty("parts");
expect(messages[1]?.parts).toHaveLength(1);
});
test("should include transient messages without storing them", async () => {
const transientMessages: ChatMessage[] = [
{ role: "system", content: "Transient system message" },
{ role: "user", content: "Transient user message" },
];
const messages = await memory.get({ transientMessages });
// Should return stored messages + transient messages
expect(messages).toHaveLength(4);
expect(messages[0]).toEqual({ role: "user", content: "User message" });
expect(messages[1]).toEqual({
role: "assistant",
content: "Assistant response",
});
expect(messages[2]).toEqual({
role: "system",
content: "Transient system message",
});
expect(messages[3]).toEqual({
role: "user",
content: "Transient user message",
});
// Verify transient messages are not stored permanently
const storedMessages = await memory.get();
expect(storedMessages).toHaveLength(2);
expect(storedMessages[0]).toEqual({
role: "user",
content: "User message",
});
expect(storedMessages[1]).toEqual({
role: "assistant",
content: "Assistant response",
});
});
});
describe("getLLM", () => {
beforeEach(async () => {
// Add test messages with varying lengths
await memory.add({ role: "user", content: "Short message 1" });
await memory.add({
role: "assistant",
content:
"This is a longer assistant response with more content to test token limits",
});
await memory.add({ role: "user", content: "Another user message" });
await memory.add({
role: "assistant",
content: "Final assistant response",
});
});
test("should return all messages when no LLM is provided", async () => {
const messages = await memory.getLLM();
expect(messages).toHaveLength(4);
expect(messages[0]?.content).toBe("Short message 1");
expect(messages[1]?.content).toBe(
"This is a longer assistant response with more content to test token limits",
);
expect(messages[2]?.content).toBe("Another user message");
expect(messages[3]?.content).toBe("Final assistant response");
});
test("should include transient messages in token calculation", async () => {
const transientMessages: ChatMessage[] = [
{ role: "system", content: "System instruction" },
{ role: "user", content: "Transient user question" },
];
const messages = await memory.getLLM(
createMockLLM(500),
transientMessages,
);
// Should include some combination of stored and transient messages
expect(messages.length).toBeGreaterThan(0);
// Check if transient messages are included (they should be recent)
const messageContents = messages.map((m) => m.content);
const hasTransientMessage = messageContents.some(
(content) =>
content === "System instruction" ||
content === "Transient user question",
);
expect(hasTransientMessage).toBe(true);
});
test("should handle empty memory with transient messages", async () => {
const emptyMemory = createMemory();
const transientMessages: ChatMessage[] = [
{ role: "system", content: "System message" },
{ role: "user", content: "User question" },
];
const messages = await emptyMemory.getLLM(
createMockLLM(1000),
transientMessages,
);
expect(messages).toHaveLength(2);
expect(messages[0]?.content).toBe("System message");
expect(messages[1]?.content).toBe("User question");
});
});
describe("token limit handling", () => {
beforeEach(async () => {
// Add messages with different lengths for testing
await memory.add({
role: "assistant",
content:
"This is a medium length response that should take up more tokens than the previous message",
});
await memory.add({ role: "user", content: "Short" }); // has 2 tokens
await memory.add({ role: "assistant", content: "Last message" }); // has 4 tokens
});
test("should return messages in token limit", async () => {
const messages = await memory.getLLM(createMockLLM(1000));
expect(messages).toHaveLength(3);
expect(messages[0]?.content).toBe(
"This is a medium length response that should take up more tokens than the previous message",
);
expect(messages[1]?.content).toBe("Short");
expect(messages[2]?.content).toBe("Last message");
});
test("should only return messages that fit in the token limit", async () => {
const messages = await memory.getLLM(createMockLLM(6));
expect(messages).toHaveLength(1);
expect(messages[0]?.content).toBe("Last message");
});
});
describe("clear", () => {
test("should clear all messages", async () => {
await memory.add({ role: "user", content: "Test message" });
await memory.add({ role: "assistant", content: "Test response" });
expect(await memory.get()).toHaveLength(2);
await memory.clear();
expect(await memory.get()).toHaveLength(0);
});
test("should allow adding messages after clearing", async () => {
await memory.add({ role: "user", content: "First message" });
await memory.clear();
await memory.add({ role: "user", content: "After clear" });
const messages = await memory.get();
expect(messages).toHaveLength(1);
expect(messages[0]?.content).toBe("After clear");
});
});
describe("edge cases", () => {
test("should handle message with empty content", async () => {
await memory.add({ role: "user", content: "" });
const messages = await memory.get();
expect(messages).toHaveLength(1);
expect(messages[0]?.content).toBe("");
});
test("should handle different role types", async () => {
const roles: ChatMessage["role"][] = [
"user",
"assistant",
"system",
"memory",
"developer",
];
for (const role of roles) {
await memory.add({ role, content: `Message from ${role}` });
}
const messages = await memory.get();
expect(messages).toHaveLength(roles.length);
roles.forEach((role, index) => {
expect(messages[index]?.role).toBe(role);
expect(messages[index]?.content).toBe(`Message from ${role}`);
});
});
test("should handle Vercel message with data role", async () => {
const vercelMessage = {
id: "test-id",
role: "data",
content: "Data message",
parts: [{ type: "text", text: "Data message" }],
createdAt: new Date(),
annotations: [],
};
await memory.add(vercelMessage);
const messages = await memory.get();
expect(messages[0]?.role).toBe("user"); // data role should be mapped to user
});
});
});
@@ -0,0 +1,397 @@
import type { ChatMessage, MessageContentDetail } from "@llamaindex/core/llms";
import type { MemoryMessage, VercelMessage } from "@llamaindex/core/memory";
import { VercelMessageAdapter } from "@llamaindex/core/memory";
import { describe, expect, test } from "vitest";
describe("VercelMessageAdapter", () => {
const adapter = new VercelMessageAdapter();
describe("toLlamaIndexMessage", () => {
test("should convert basic Vercel message to LlamaIndex message", () => {
const vercelMessage: VercelMessage = {
id: "test-id",
role: "user",
content: "Hello, world!",
parts: [{ type: "text", text: "Hello, world!" }],
createdAt: new Date(),
annotations: [],
};
const result = adapter.toMemory(vercelMessage);
expect(result).toEqual({
id: "test-id",
role: "user",
content: "Hello, world!",
annotations: [],
createdAt: vercelMessage.createdAt,
});
});
test("should handle all supported Vercel message roles", () => {
const roles: Array<VercelMessage["role"]> = [
"system",
"user",
"assistant",
"data",
];
roles.forEach((role) => {
const vercelMessage: VercelMessage = {
id: "test-id",
role,
content: `Message from ${role}`,
parts: [{ type: "text", text: `Message from ${role}` }],
createdAt: new Date(),
annotations: [],
};
const result = adapter.toMemory(vercelMessage);
// Data role should be mapped to user
const expectedRole = role === "data" ? "user" : role;
expect(result.role).toBe(expectedRole);
expect(result.content).toBe(`Message from ${role}`);
});
});
test("should convert file parts to MessageContent", () => {
const vercelMessage: VercelMessage = {
id: "test-id",
role: "user",
content: "File message",
parts: [
{ type: "file", data: "base64data", mimeType: "image/png" },
{ type: "text", text: "Description" },
],
createdAt: new Date(),
annotations: [],
};
const result = adapter.toMemory(vercelMessage);
expect(result.content).toEqual([
{ type: "file", data: "base64data", mimeType: "image/png" },
{ type: "text", text: "Description" },
]);
});
test("should handle empty parts array", () => {
const vercelMessage: VercelMessage = {
id: "test-id",
role: "user",
content: "Fallback content",
parts: [],
createdAt: new Date(),
annotations: [],
};
const result = adapter.toMemory(vercelMessage);
expect(result.content).toBe("Fallback content");
});
test("should handle single text part", () => {
const vercelMessage: VercelMessage = {
id: "test-id",
role: "user",
content: "Original content",
parts: [{ type: "text", text: "Single text part" }],
createdAt: new Date(),
annotations: [],
};
const result = adapter.toMemory(vercelMessage);
expect(result.content).toBe("Single text part");
});
});
describe("toUIMessage", () => {
test("should convert basic MemoryMessage to Vercel message", () => {
const memoryMessage: MemoryMessage = {
id: "test-id",
role: "user",
content: "Hello, LlamaIndex!",
createdAt: new Date(),
annotations: [],
};
const result = adapter.fromMemory(memoryMessage);
expect(result).toMatchObject({
id: "test-id",
role: "user",
content: "Hello, LlamaIndex!",
parts: [{ type: "text", text: "Hello, LlamaIndex!" }],
annotations: [],
});
});
test("should convert MemoryMessage with options to Vercel message", () => {
const createdAt = new Date();
const annotations = ["test"];
const memoryMessage: MemoryMessage = {
id: "test-id",
role: "user",
content: "Hello, LlamaIndex!",
createdAt,
annotations,
};
const result = adapter.fromMemory(memoryMessage);
expect(result).toMatchObject({
role: "user",
content: "Hello, LlamaIndex!",
parts: [{ type: "text", text: "Hello, LlamaIndex!" }],
id: "test-id",
createdAt,
annotations,
});
});
test("should handle all MemoryMessage roles", () => {
const roles: Array<MemoryMessage["role"]> = [
"user",
"assistant",
"system",
"memory",
"developer",
];
roles.forEach((role) => {
const memoryMessage: MemoryMessage = {
id: "test-id",
role,
content: `Message from ${role}`,
createdAt: new Date(),
annotations: [],
};
const result = adapter.fromMemory(memoryMessage);
// Memory role should be mapped to system, developer to user
let expectedRole: VercelMessage["role"];
switch (role) {
case "memory":
expectedRole = "system";
break;
case "developer":
expectedRole = "user";
break;
default:
expectedRole = role as VercelMessage["role"];
}
expect(result.role).toBe(expectedRole);
expect(result.content).toBe(`Message from ${role}`);
});
});
test("should convert multi-modal content to parts", () => {
const memoryMessage: MemoryMessage = {
id: "test-id",
role: "user",
content: [
{ type: "text", text: "Text content" },
{
type: "image_url",
image_url: { url: "https://example.com/image.jpg" },
},
{ type: "file", data: "base64data", mimeType: "application/pdf" },
] as MessageContentDetail[],
};
const result = adapter.fromMemory(memoryMessage);
expect(result.parts).toEqual([
{ type: "text", text: "Text content" },
{ type: "text", text: "[Image URL: https://example.com/image.jpg]" },
{ type: "file", data: "base64data", mimeType: "file" },
]);
expect(result.content).toBe("Text content");
});
test("should handle different media types", () => {
const memoryMessage: MemoryMessage = {
id: "test-id",
role: "user",
content: [
{ type: "audio", data: "audio-data", mimeType: "audio/mp3" },
{ type: "video", data: "video-data", mimeType: "video/mp4" },
{ type: "image", data: "image-data", mimeType: "image/png" },
] as MessageContentDetail[],
};
const result = adapter.fromMemory(memoryMessage);
expect(result.parts).toEqual([
{ type: "file", data: "audio-data", mimeType: "audio" },
{ type: "file", data: "video-data", mimeType: "video" },
{ type: "file", data: "image-data", mimeType: "image" },
]);
});
test("should handle unknown content types", () => {
const memoryMessage: MemoryMessage = {
id: "test-id",
role: "user",
content: [
{
type: "unknown",
data: "unknown-data",
} as unknown as MessageContentDetail,
],
};
const result = adapter.fromMemory(memoryMessage);
expect(result.parts).toEqual([
{
type: "text",
text: JSON.stringify({ type: "unknown", data: "unknown-data" }),
},
]);
});
});
describe("isVercelMessage", () => {
test("should return true for valid Vercel message", () => {
const validMessage: VercelMessage = {
id: "test-id",
role: "user",
content: "Test content",
parts: [],
createdAt: new Date(),
annotations: [],
};
expect(adapter.isCompatible(validMessage)).toBe(true);
});
test("should return true for all valid roles", () => {
const roles: Array<VercelMessage["role"]> = [
"system",
"user",
"assistant",
"data",
];
roles.forEach((role) => {
const message = {
id: "test-id",
role,
content: "Test content",
parts: [],
};
expect(adapter.isCompatible(message)).toBe(true);
});
});
});
describe("isLlamaIndexMessage", () => {
test("should return true for valid LlamaIndex message", () => {
const validMessage: ChatMessage = {
role: "user",
content: "Test content",
};
expect(adapter.isCompatible(validMessage)).toBe(false);
});
test("should return true for all valid roles", () => {
const roles: Array<ChatMessage["role"]> = [
"user",
"assistant",
"system",
"memory",
"developer",
];
roles.forEach((role) => {
const message = {
role,
content: "Test content",
};
expect(adapter.isCompatible(message)).toBe(false);
});
});
test("should return false for invalid message structures", () => {
const invalidMessages = [
null,
undefined,
"string",
123,
{},
{ role: "user" }, // missing content
{ content: "test" }, // missing role
{ role: "invalid", content: "test" }, // invalid role
{ role: "user", content: 123 }, // invalid content type (not string or array)
];
invalidMessages.forEach((message) => {
expect(adapter.isCompatible(message)).toBe(false);
});
});
});
describe("edge cases and error handling", () => {
test("should handle conversion with undefined optional fields", () => {
const vercelMessage = {
id: "test-id",
role: "user" as const,
content: "Test content",
parts: [{ type: "text" as const, text: "Test content" }],
// missing optional fields
};
const result = adapter.toMemory(vercelMessage);
expect(result.role).toBe("user");
expect(result.content).toBe("Test content");
});
test("should handle empty string content", () => {
const memoryMessage: MemoryMessage = {
id: "test-id",
role: "user",
content: "",
};
const result = adapter.fromMemory(memoryMessage);
expect(result.content).toBe("");
expect(result.parts).toEqual([{ type: "text", text: "" }]);
});
test("should handle empty array content", () => {
const memoryMessage: MemoryMessage = {
id: "test-id",
role: "user",
content: [],
};
const result = adapter.fromMemory(memoryMessage);
expect(result.content).toBe("");
expect(result.parts).toEqual([]);
});
test("should generate unique IDs", () => {
const memoryMessage: MemoryMessage = {
id: "test-id",
role: "user",
content: "Test",
};
const result1 = adapter.fromMemory(memoryMessage);
const result2 = adapter.toMemory(result1);
// Both should have valid UUIDs (they will be different)
expect(typeof result1.id).toBe("string");
expect(result1.id.length).toBeGreaterThan(0);
});
});
});
+118
View File
@@ -0,0 +1,118 @@
import {
createMemory,
loadMemory,
type MemoryMessage,
} from "@llamaindex/core/memory";
import { describe, expect, it } from "vitest";
describe("Memory Snapshot", () => {
it("should create a snapshot of empty memory", () => {
const memory = createMemory();
const snapshot = memory.snapshot();
const parsedSnapshot = JSON.parse(snapshot);
expect(typeof snapshot).toBe("string");
expect(parsedSnapshot).toEqual({
messages: [],
memoryCursor: 0,
});
});
it("should create a snapshot with messages", async () => {
const memory = createMemory();
const message1: MemoryMessage = {
id: "test-id",
role: "user",
content: "Hello",
};
const message2: MemoryMessage = {
id: "test-id",
role: "assistant",
content: "Hi there!",
};
await memory.add(message1);
await memory.add(message2);
const snapshot = memory.snapshot();
const parsedSnapshot = JSON.parse(snapshot);
expect(typeof snapshot).toBe("string");
expect(parsedSnapshot.messages).toHaveLength(2);
expect(parsedSnapshot.messages[0].id).toBe(message1.id);
expect(parsedSnapshot.messages[1].id).toBe(message2.id);
});
it("should load memory from snapshot", async () => {
const originalMemory = createMemory();
const message: MemoryMessage = {
id: "test-id",
role: "user",
content: "Test message",
};
await originalMemory.add(message);
const snapshot = originalMemory.snapshot();
const loadedMemory = loadMemory(snapshot);
const loadedSnapshot = JSON.parse(loadedMemory.snapshot());
expect(loadedSnapshot).toEqual(JSON.parse(snapshot));
});
it("should load memory with correct messages", async () => {
const message1: MemoryMessage = {
id: "test-id-1",
role: "user",
content: "First message",
};
const message2: MemoryMessage = {
id: "test-id-2",
role: "assistant",
content: "Second message",
};
const snapshot = JSON.stringify({
messages: [message1, message2],
});
const memory = loadMemory(snapshot);
const messages = await memory.get();
expect(messages).toHaveLength(2);
expect(messages[0]?.content).toBe(message1.content);
expect(messages[1]?.content).toBe(message2.content);
const vercelMessages = await memory.get({ type: "vercel" });
expect(vercelMessages).toHaveLength(2);
expect(vercelMessages[0]?.id).toBe(message1.id);
expect(vercelMessages[1]?.id).toBe(message2.id);
});
it("should create independent memory instances", async () => {
const originalMemory = createMemory();
const message: MemoryMessage = {
id: "test-id",
role: "user",
content: "Original message",
};
await originalMemory.add(message);
const snapshot = originalMemory.snapshot();
const loadedMemory = loadMemory(snapshot);
const newMessage: MemoryMessage = {
id: "test-id-2",
role: "user",
content: "New message",
};
await loadedMemory.add(newMessage);
const originalMessages = await originalMemory.get();
const loadedMessages = await loadedMemory.get();
expect(originalMessages).toHaveLength(1);
expect(loadedMessages).toHaveLength(2);
});
});

Some files were not shown because too many files have changed in this diff Show More