Compare commits

...

16 Commits

Author SHA1 Message Date
github-actions[bot] e4c7113614 Release 0.11.21 (#2128)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
2025-07-22 12:23:58 +08:00
Thuc Pham 38da40bc98 feat: VectoryMemoryBlock (#2110)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-07-22 12:18:09 +08:00
Marcus Schiesser 4d50ca4d84 chore: add streamchat test (#2122) 2025-07-22 11:30:01 +08:00
github-actions[bot] 8b5253a297 Release (#2127) 2025-07-21 15:40:31 -06:00
Logan ea15e75c89 deployment docs nits (#2126) 2025-07-21 15:30:37 -06:00
github-actions[bot] 3be87d4670 Release 0.11.20 (#2121)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: himself65 <14026360+himself65@users.noreply.github.com>
2025-07-21 09:37:44 -07:00
Terence Sim 94da13db0d fix: azure openai streamchat empty delta throw TypeError (#2118)
Co-authored-by: Terence Sim <40583743+InTheAxis@users.noreply.github.com>
2025-07-21 09:16:09 -07:00
Terence Sim acd50ea99f chore: replaced console.log with logger type from @llamaindex/env (#2123)
Co-authored-by: Terence Sim <40583743+InTheAxis@users.noreply.github.com>
2025-07-21 09:14:06 -07:00
Adrian Lyjak 2967d57ac0 feat: default to _public agent data (#2117) 2025-07-21 09:07:15 -07:00
Thuc Pham a8ec08c682 fix: ensure correct message content in agent workflow (#2114)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-07-21 15:13:27 +08:00
Terence Sim 678b327051 feat: added apac bedrock models (#2119)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-07-21 12:13:37 +08:00
Jeremy B. Merrill 650eeb1df3 fix: GeminiEmbedding should send batches of max 100 (#2099)
Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>
2025-07-21 12:12:42 +08:00
Laurie Voss 50f6747758 Instrumenting with Google Tag Manager (in addition to Google Analytics) (#2116) 2025-07-20 13:18:09 -07:00
github-actions[bot] 12414a6836 Release (#2113)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
2025-07-18 13:54:38 +08:00
Marcus Schiesser 856dd8cca8 fix: assume new models are function call models (#2112) 2025-07-18 12:52:43 +08:00
Jerry Cheng d8f4f6a859 Update SupabaseVectorStore.ts to fix score calculating error (#2109)
Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>
2025-07-18 12:48:47 +08:00
154 changed files with 2317 additions and 233 deletions
+42
View File
@@ -1,5 +1,47 @@
# @llamaindex/doc
## 0.2.44
### Patch Changes
- 38da40b: feat: VectoryMemoryBlock
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/cloud@4.0.26
- llamaindex@0.11.21
- @llamaindex/node-parser@2.0.17
- @llamaindex/openai@0.4.12
- @llamaindex/readers@3.1.16
- @llamaindex/workflow@1.1.17
## 0.2.43
### Patch Changes
- ea15e75: Minor updates in deployment docs
## 0.2.42
### Patch Changes
- a8ec08c: fix: ensure correct message content in agent workflow
- Updated dependencies [a8ec08c]
- Updated dependencies [2967d57]
- @llamaindex/core@0.6.16
- @llamaindex/workflow@1.1.16
- @llamaindex/cloud@4.0.25
- llamaindex@0.11.20
- @llamaindex/node-parser@2.0.16
- @llamaindex/openai@0.4.11
- @llamaindex/readers@3.1.15
## 0.2.41
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.2.40
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/doc",
"version": "0.2.40",
"version": "0.2.44",
"private": true,
"scripts": {
"postinstall": "fumadocs-mdx",
+2 -1
View File
@@ -1,6 +1,6 @@
import { AIProvider } from "@/actions";
import { TooltipProvider } from "@/components/ui/tooltip";
import { GoogleAnalytics } from "@next/third-parties/google";
import { GoogleAnalytics, GoogleTagManager } from "@next/third-parties/google";
import { RootProvider } from "fumadocs-ui/provider";
import { Inter } from "next/font/google";
import type { ReactNode } from "react";
@@ -36,6 +36,7 @@ export default function Layout({ children }: { children: ReactNode }) {
LlamaIndex.TS - Build LLM-powered document agents and workflows
</title>
</head>
<GoogleTagManager gtmId="GTM-WWRFB36R" />
<body className="flex min-h-screen flex-col">
<TooltipProvider>
<AIProvider>
@@ -77,7 +77,7 @@ export async function POST(request: NextRequest) {
const agent = await initializeAgent();
const result = await agent.run(message);
return NextResponse.json({ response: result.result });
return NextResponse.json({ response: result.data });
} catch (error) {
console.error("Chat error:", error);
return NextResponse.json(
@@ -132,7 +132,7 @@ export default async function handler(
const agent = await initializeAgent();
const result = await agent.run(message);
res.json({ response: result.result });
res.json({ response: result.data });
} catch (error) {
console.error("Chat error:", error);
res.status(500).json({ error: "Internal server error" });
@@ -220,7 +220,7 @@ export async function POST(request: NextRequest) {
});
const result = await myAgent.run(message);
return NextResponse.json({ response: result.result });
return NextResponse.json({ response: result.data });
} catch (error) {
return NextResponse.json({ error: error.message }, { status: 500 });
}
@@ -233,11 +233,40 @@ Implement streaming for better user experience:
```typescript
// app/api/chat-stream/route.ts
import { agent } from "@llamaindex/workflow";
import { tool } from "llamaindex";
import { openai } from "@llamaindex/openai";
import { agentStreamEvent } from "@llamaindex/workflow";
import { NextRequest } from "next/server";
import { z } from "zod";
// Assume myAgent is initialized elsewhere
declare const myAgent: any;
// Initialize agent once (consider using a singleton pattern)
let myAgent: any = null;
async function initializeAgent() {
if (myAgent) return myAgent;
try {
const greetTool = tool({
name: "greet",
description: "Greets a user with their name",
parameters: z.object({
name: z.string(),
}),
execute: ({ name }) => `Hello, ${name}! How can I help you today?`,
});
myAgent = agent({
tools: [greetTool],
llm: openai({ model: "gpt-4o-mini" }),
});
return myAgent;
} catch (error) {
console.error("Failed to initialize agent:", error);
throw error;
}
}
export async function POST(request: NextRequest) {
const { message } = await request.json();
@@ -245,9 +274,10 @@ export async function POST(request: NextRequest) {
const stream = new ReadableStream({
async start(controller) {
try {
const context = myAgent.runStream(message);
const agent = await initializeAgent();
const events = agent.runStream(message);
for await (const event of context) {
for await (const event of events) {
if (agentStreamEvent.include(event)) {
controller.enqueue(new TextEncoder().encode(event.data.delta));
}
@@ -63,7 +63,7 @@ app.post('/api/chat', async (req, res) => {
try {
const { message } = req.body;
const result = await myAgent.run(message);
res.json({ response: result.result });
res.json({ response: result.data });
} catch (error) {
res.status(500).json({ error: 'Chat failed' });
}
@@ -110,7 +110,7 @@ fastify.post('/api/chat', async (request, reply) => {
try {
const { message } = request.body as { message: string };
const result = await myAgent.run(message);
return { response: result.result };
return { response: result.data };
} catch (error) {
reply.status(500).send({ error: 'Chat failed' });
}
@@ -162,7 +162,7 @@ app.post("/api/chat", async (c) => {
try {
const result = await myAgent.run(message);
return c.json({ response: result.result });
return c.json({ response: result.data });
} catch (error) {
return c.json({ error: error.message }, 500);
}
@@ -187,9 +187,9 @@ app.post('/api/chat-stream', async (req, res) => {
});
try {
const context = myAgent.runStream(message);
const events = myAgent.runStream(message);
for await (const event of context) {
for await (const event of events) {
if (agentStreamEvent.include(event)) {
res.write(event.data.delta);
}
@@ -34,7 +34,7 @@ export default {
const { message } = await request.json();
const result = await myAgent.run(message);
return new Response(JSON.stringify({ response: result.result }), {
return new Response(JSON.stringify({ response: result.data }), {
headers: { "Content-Type": "application/json" },
});
} catch (error) {
@@ -83,7 +83,7 @@ export default async function handler(req, res) {
try {
const result = await myAgent.run(message);
res.json({ response: result.result });
res.json({ response: result.data });
} catch (error) {
res.status(500).json({ error: error.message });
}
@@ -124,7 +124,7 @@ export async function POST(request: NextRequest) {
});
const result = await myAgent.run(message);
return NextResponse.json({ response: result.result });
return NextResponse.json({ response: result.data });
} catch (error) {
return NextResponse.json({ error: error.message }, { status: 500 });
}
@@ -173,7 +173,7 @@ export const handler: APIGatewayProxyHandler = async (event, context) => {
"Content-Type": "application/json",
"Access-Control-Allow-Origin": "*",
},
body: JSON.stringify({ response: result.result }),
body: JSON.stringify({ response: result.data }),
};
} catch (error) {
return {
@@ -222,7 +222,7 @@ export const handler: Handler = async (event, context) => {
return {
statusCode: 200,
body: JSON.stringify({ response: result.result }),
body: JSON.stringify({ response: result.data }),
};
} catch (error) {
return {
@@ -34,6 +34,7 @@ const jokeAgent = agent({
// Run the workflow
const result = await jokeAgent.run("Tell me something funny");
console.log(result.data.result); // Baby Llama is called cria
console.log(result.data.message); // { role: 'assistant', content: 'Baby Llama is called cria' }
```
### Event Streaming
@@ -106,34 +106,40 @@ const memory = createMemory({
Long-term memory is represented as `Memory Block` objects. These objects contain information that are from previous user sessions or from the beginning of the current conversation. When memory is retrieved (by calling `getLLM`), the short-term and long-term memories are merged together within the given `tokenLimit`.
Currently, there are two predefined memory blocks:
Currently, there are three predefined memory blocks:
- `staticBlock`: A memory block that stores a static piece of information.
- `factExtractionBlock`: A memory block that extracts facts from the chat history.
- `vectorBlock`: A memory block that stores and retrieves chat messages from a vector database using semantic similarity search. Messages are stored individually and retrieved based on their relevance to recent conversation context. Here we've passed in the `vectorStore` to use to store and retrieve the chat messages.
This sounds a bit complicated, but it's actually quite simple. Let's look at an example:
```ts
import { createMemory, factExtractionBlock, staticBlock } from "llamaindex";
import { createMemory, factExtractionBlock, staticBlock, vectorBlock } from "llamaindex";
import { QdrantVectorStore } from "@llamaindex/qdrant";
import { OpenAIEmbedding } from "@llamaindex/openai";
const memoryBlocks= [
staticBlock({
id: "core_info",
content: "My name is Logan, and I live in Saskatoon. I work at LlamaIndex.",
}),
factExtractionBlock({
id: "user-extracted_info",
priority: 1,
llm: llm,
maxFacts: 50,
}),
vectorBlock({
vectorStore: new QdrantVectorStore({ url: "http://localhost:6333" }),
priority: 2,
}),
];
```
Here, we've setup two memory blocks:
Here, we've setup three memory blocks:
- `core_info`: A static memory block that stores some core information about the user. This information will always be inserted into the memory. The type used is `MessageContent` to support multi-modal content.
- `extracted_info`: An extracted memory block that will extract information from the chat history. Here we've passed in the `llm` to use to extract facts from the chat history, and set the `maxFacts` to 50. If the number of extracted facts exceeds this limit, the `maxFacts` will be automatically summarized and reduced to leave room for new information.
- `staticBlock`: A static memory block that stores some core information about the user. This information will always be inserted into the memory. The type used is `MessageContent` to support multi-modal content.
- `factExtractionBlock`: An extracted memory block that will extract information from the chat history. Here we've passed in the `llm` to use to extract facts from the chat history, and set the `maxFacts` to 50. If the number of extracted facts exceeds this limit, the `maxFacts` will be automatically summarized and reduced to leave room for new information.
- `vectorBlock`: A vector memory block that will store in a vector database and retrieve them from there. Messages are stored individually and retrieved based on their relevance to recent conversation context. Here we've passed in the `vectorStore` to use to store and retrieve the chat messages.
You'll also notice that we've set the `priority` for the `factExtractionBlock` block. This is used to determine the handling when the memory blocks content (i.e. long-term memory) + short-term memory exceeds the token limit on the `Memory` object.
@@ -158,6 +164,46 @@ When memory is retrieved (using `getLLM`), the short-term and long-term memories
The amount of short-term memory included is specified by the `shortTermTokenLimitRatio`. If it's set to `0.7`, 70% of the `tokenLimit` is used for short-term memory (not including the static memory block).
#### VectorBlock Configuration Options
The `vectorBlock` offers several configuration options to customize its behavior:
```ts
vectorBlock({
vectorStore: new QdrantVectorStore({ url: "http://localhost:6333" }),
priority: 2,
retrievalContextWindow: 5, // Number of recent messages to use for context when retrieving
formatTemplate: new PromptTemplate({ template: "Context: {{ context }}" }), // Custom formatting template
nodePostprocessors: [/* custom postprocessors */], // Apply processing to retrieved nodes
queryOptions: {
similarityTopK: 3, // Number of top similar results to return (default: 2)
mode: VectorStoreQueryMode.DEFAULT, // Query mode for the vector store
sessionFilterKey: "session_id", // Metadata key for session filtering (default: "session_id")
// Custom filters can be added here - session filter is automatically included
filters: {
filters: [
{ key: "custom_field", value: "custom_value", operator: "==" }
],
condition: "and"
}
}
})
```
**Key Configuration Options:**
- **`retrievalContextWindow`**: Number of recent messages to consider when creating the retrieval query (default: 5). A larger window provides more context but may be less precise.
- **`formatTemplate`**: Template for formatting retrieved information before adding to memory. Defaults to a simple context template.
- **`nodePostprocessors`**: Array of postprocessors to apply to retrieved nodes, useful for filtering or transforming results.
- **`queryOptions.similarityTopK`**: Number of most similar messages to retrieve from the vector store (default: 2).
- **`queryOptions.sessionFilterKey`**: Metadata key used to isolate memory between different sessions (default: "session_id").
- **`queryOptions.filters`**: Additional metadata filters for retrieval. The session filter is automatically added to ensure memory isolation.
**Session Isolation:**
The vectorBlock automatically adds a session filter using the block's ID to ensure that memories from different sessions don't interfere with each other. This filter uses the `sessionFilterKey` (default: "session_id") and can be customized if needed.
## Persistence with Snapshots
Save and restore memory state:
@@ -1,5 +1,17 @@
# @llamaindex/cloudflare-worker-agent-test
## 0.0.182
### Patch Changes
- llamaindex@0.11.21
## 0.0.181
### Patch Changes
- llamaindex@0.11.20
## 0.0.180
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloudflare-worker-agent-test",
"version": "0.0.180",
"version": "0.0.182",
"type": "module",
"private": true,
"scripts": {
@@ -1,5 +1,18 @@
# @llamaindex/llama-parse-browser-test
## 0.0.81
### Patch Changes
- @llamaindex/cloud@4.0.26
## 0.0.80
### Patch Changes
- Updated dependencies [2967d57]
- @llamaindex/cloud@4.0.25
## 0.0.79
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/llama-parse-browser-test",
"private": true,
"version": "0.0.79",
"version": "0.0.81",
"type": "module",
"scripts": {
"dev": "vite",
+12
View File
@@ -1,5 +1,17 @@
# @llamaindex/next-agent-test
## 0.1.182
### Patch Changes
- llamaindex@0.11.21
## 0.1.181
### Patch Changes
- llamaindex@0.11.20
## 0.1.180
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-agent-test",
"version": "0.1.180",
"version": "0.1.182",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,17 @@
# test-edge-runtime
## 0.1.181
### Patch Changes
- llamaindex@0.11.21
## 0.1.180
### Patch Changes
- llamaindex@0.11.20
## 0.1.179
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/nextjs-edge-runtime-test",
"version": "0.1.179",
"version": "0.1.181",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,27 @@
# @llamaindex/next-node-runtime
## 0.1.51
### Patch Changes
- llamaindex@0.11.21
- @llamaindex/huggingface@0.1.22
- @llamaindex/readers@3.1.16
## 0.1.50
### Patch Changes
- llamaindex@0.11.20
- @llamaindex/huggingface@0.1.21
- @llamaindex/readers@3.1.15
## 0.1.49
### Patch Changes
- @llamaindex/huggingface@0.1.20
## 0.1.48
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-node-runtime-test",
"version": "0.1.48",
"version": "0.1.51",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,17 @@
# vite-import-llamaindex
## 0.0.48
### Patch Changes
- llamaindex@0.11.21
## 0.0.47
### Patch Changes
- llamaindex@0.11.20
## 0.0.46
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "vite-import-llamaindex",
"private": true,
"version": "0.0.46",
"version": "0.0.48",
"type": "module",
"scripts": {
"build": "vite build",
@@ -1,5 +1,17 @@
# @llamaindex/waku-query-engine-test
## 0.0.182
### Patch Changes
- llamaindex@0.11.21
## 0.0.181
### Patch Changes
- llamaindex@0.11.20
## 0.0.180
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/waku-query-engine-test",
"version": "0.0.180",
"version": "0.0.182",
"type": "module",
"private": true,
"scripts": {
+1 -1
View File
@@ -23,7 +23,7 @@ await test("pinecone", async (t) => {
});
const vectorStore = new PineconeVectorStore({
embeddingModel: openaiEmbedding,
embedModel: openaiEmbedding,
});
t.after(async () => {
+127
View File
@@ -1,5 +1,132 @@
# examples
## 0.3.33
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/cloud@4.0.26
- llamaindex@0.11.21
- @llamaindex/node-parser@2.0.17
- @llamaindex/anthropic@0.3.19
- @llamaindex/assemblyai@0.1.16
- @llamaindex/clip@0.0.68
- @llamaindex/cohere@0.0.31
- @llamaindex/deepinfra@0.0.68
- @llamaindex/discord@0.1.16
- @llamaindex/google@0.3.16
- @llamaindex/huggingface@0.1.22
- @llamaindex/jinaai@0.0.28
- @llamaindex/mistral@0.1.17
- @llamaindex/mixedbread@0.0.31
- @llamaindex/notion@0.1.16
- @llamaindex/ollama@0.1.17
- @llamaindex/openai@0.4.12
- @llamaindex/perplexity@0.0.25
- @llamaindex/portkey-ai@0.0.59
- @llamaindex/replicate@0.0.59
- @llamaindex/bm25-retriever@0.0.6
- @llamaindex/astra@0.0.31
- @llamaindex/azure@0.1.29
- @llamaindex/chroma@0.0.31
- @llamaindex/elastic-search@0.1.17
- @llamaindex/firestore@1.0.24
- @llamaindex/milvus@0.1.26
- @llamaindex/mongodb@0.0.32
- @llamaindex/pinecone@0.1.17
- @llamaindex/postgres@0.0.60
- @llamaindex/qdrant@0.1.27
- @llamaindex/supabase@0.1.18
- @llamaindex/upstash@0.0.31
- @llamaindex/weaviate@0.0.32
- @llamaindex/vercel@0.1.17
- @llamaindex/voyage-ai@1.0.23
- @llamaindex/readers@3.1.16
- @llamaindex/tools@0.1.7
- @llamaindex/workflow@1.1.17
- @llamaindex/deepseek@0.0.29
- @llamaindex/fireworks@0.0.28
- @llamaindex/groq@0.0.84
- @llamaindex/together@0.0.28
- @llamaindex/vllm@0.0.54
- @llamaindex/xai@0.0.15
## 0.3.32
### Patch Changes
- Updated dependencies [650eeb1]
- Updated dependencies [a8ec08c]
- Updated dependencies [2967d57]
- @llamaindex/google@0.3.15
- @llamaindex/core@0.6.16
- @llamaindex/workflow@1.1.16
- @llamaindex/cloud@4.0.25
- llamaindex@0.11.20
- @llamaindex/node-parser@2.0.16
- @llamaindex/anthropic@0.3.18
- @llamaindex/assemblyai@0.1.15
- @llamaindex/clip@0.0.67
- @llamaindex/cohere@0.0.30
- @llamaindex/deepinfra@0.0.67
- @llamaindex/discord@0.1.15
- @llamaindex/huggingface@0.1.21
- @llamaindex/jinaai@0.0.27
- @llamaindex/mistral@0.1.16
- @llamaindex/mixedbread@0.0.30
- @llamaindex/notion@0.1.15
- @llamaindex/ollama@0.1.16
- @llamaindex/openai@0.4.11
- @llamaindex/perplexity@0.0.24
- @llamaindex/portkey-ai@0.0.58
- @llamaindex/replicate@0.0.58
- @llamaindex/bm25-retriever@0.0.5
- @llamaindex/astra@0.0.30
- @llamaindex/azure@0.1.28
- @llamaindex/chroma@0.0.30
- @llamaindex/elastic-search@0.1.16
- @llamaindex/firestore@1.0.23
- @llamaindex/milvus@0.1.25
- @llamaindex/mongodb@0.0.31
- @llamaindex/pinecone@0.1.16
- @llamaindex/postgres@0.0.59
- @llamaindex/qdrant@0.1.26
- @llamaindex/supabase@0.1.17
- @llamaindex/upstash@0.0.30
- @llamaindex/weaviate@0.0.31
- @llamaindex/vercel@0.1.16
- @llamaindex/voyage-ai@1.0.22
- @llamaindex/readers@3.1.15
- @llamaindex/tools@0.1.6
- @llamaindex/deepseek@0.0.28
- @llamaindex/fireworks@0.0.27
- @llamaindex/groq@0.0.83
- @llamaindex/together@0.0.27
- @llamaindex/vllm@0.0.53
- @llamaindex/xai@0.0.14
## 0.3.31
### Patch Changes
- Updated dependencies [d8f4f6a]
- Updated dependencies [856dd8c]
- @llamaindex/supabase@0.1.16
- @llamaindex/openai@0.4.10
- @llamaindex/clip@0.0.66
- @llamaindex/deepinfra@0.0.66
- @llamaindex/deepseek@0.0.27
- @llamaindex/fireworks@0.0.26
- @llamaindex/groq@0.0.82
- @llamaindex/huggingface@0.1.20
- @llamaindex/jinaai@0.0.26
- @llamaindex/perplexity@0.0.23
- @llamaindex/azure@0.1.27
- @llamaindex/together@0.0.26
- @llamaindex/vllm@0.0.52
- @llamaindex/xai@0.0.13
## 0.3.30
### Patch Changes
+1
View File
@@ -24,6 +24,7 @@ async function main() {
state: result.data.state,
});
console.log(`${JSON.stringify(caResult, null, 2)}`);
console.log("assistant message:", result.data.message);
}
main().catch((error) => {
+150
View File
@@ -0,0 +1,150 @@
/**
* Example: Vector Memory Block
*
* This example demonstrates how to use the VectorMemoryBlock to store and retrieve
* conversation history using vector similarity search. The vector memory block
* stores messages in a vector store and can retrieve relevant context based on
* semantic similarity to recent messages.
*/
import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
import { QdrantVectorStore } from "@llamaindex/qdrant";
import { createMemory, vectorBlock } from "llamaindex";
// Set up the LLM and embedding model
const llm = new OpenAI({ model: "gpt-4.1-mini" });
const embedModel = new OpenAIEmbedding({ model: "text-embedding-3-small" });
// Simulate a conversation with some context
// This conversation has 8 messages, which is more than the token limit of 100 tokens (set below)
// The last 4 messages are kept in to short term memory block (as their tokens are in the limit)
// Whereas the first 5 messages are added to long term memory block (in here we will use the vector memory block with Qdrant)
const CONVERSATION_TURNS = [
//// This is the first 5 messages that are added to long term memory block (vector memory block)
{
role: "user",
content: "Hi, I'm Sarah and I work as a data scientist at Google.",
},
{
role: "assistant",
content:
"Hello Sarah! It's great to meet you. Data science at Google must be exciting!",
},
{
role: "user",
content:
"Yes, I specialize in machine learning and natural language processing.",
},
{
role: "assistant",
content: "That's impressive! ML and NLP are fascinating fields.",
},
{
role: "user",
content:
"I have a PhD in Computer Science from Stanford, and I love hiking on weekends.",
},
//// This is the last 4 messages that are added to short term memory block
{
role: "assistant",
content:
"Wow, Stanford PhD! And hiking is a great way to unwind from tech work.",
},
{
role: "user",
content: "I also have two cats named Whiskers and Mittens.",
},
{
role: "assistant",
content:
"Cats make wonderful companions! Whiskers and Mittens are cute names.",
},
{
role: "user",
content: "Summary information about Sarah and her cats",
},
];
async function main() {
console.log("=== Vector Memory Block Example ===\n");
/**
* Create a vector store. You can quickly get a local instance of Qdrant running with Docker:
* ```bash
* docker pull qdrant/qdrant
* docker run -p 6333:6333 qdrant/qdrant
* ```
*
* Go to http://localhost:6333/dashboard#/collections to see your data
*/
const vectorStore = new QdrantVectorStore({
url: "http://localhost:6333",
embedModel,
});
// Create a vector memory block using the factory function
const vectorMemoryBlock = vectorBlock({
vectorStore,
priority: 5,
});
// Create a memory store with the vector memory block
const memory = createMemory([], {
llm,
memoryBlocks: [vectorMemoryBlock],
tokenLimit: 100,
shortTermTokenLimitRatio: 0.7,
});
// Store the conversation history in the vector memory
console.log(`Adding ${CONVERSATION_TURNS.length} messages to the memory...`);
for (const message of CONVERSATION_TURNS) {
await memory.add(message);
}
// Retrieve relevant context for the current user request
console.log("Retrieving relevant context...");
const chatHistory = await memory.getLLM();
// You will see there's 1 generated context message from vector memory block, and 4 messages from short term memory block
console.log("Chat memory:", chatHistory);
// Now simulate the assistant responding with context
console.log("\nAssistant response with context:");
const response = await llm.chat({
messages: chatHistory,
});
console.log(response.message.content);
// Try adding more messages to the memory
const newMessages = [
{
role: "user",
content: "Write a long paragraph about weather in Tokyo",
},
{
role: "assistant",
content:
"The weather in Tokyo is sunny and warm. The temperature is around 20 degrees Celsius. The weather is very nice and the people are friendly.",
},
{
role: "user",
content: "What is the weather in Tokyo?",
},
];
// Add the new messages to the memory
for (const message of newMessages) {
await memory.add(message);
}
// Try retrieving the new messages
const newChatHistory = await memory.getLLM();
// You can see now that new chat history will contain the nodes (separated by `\n`) in the
// context message that is generated by the vector memory block
// The number of retrieved nodes is set by `similarityTopK` in `queryOptions` of `vectorBlock`
// (default `similarityTopK` is 2)
console.log("New chat history:", newChatHistory);
}
main().catch(console.error);
+47 -47
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/examples",
"version": "0.3.30",
"version": "0.3.33",
"private": true,
"scripts": {
"lint": "eslint .",
@@ -11,52 +11,52 @@
"@azure/cosmos": "^4.1.1",
"@azure/identity": "^4.4.1",
"@azure/search-documents": "^12.1.0",
"@llamaindex/anthropic": "^0.3.17",
"@llamaindex/assemblyai": "^0.1.14",
"@llamaindex/astra": "^0.0.29",
"@llamaindex/azure": "^0.1.26",
"@llamaindex/bm25-retriever": "^0.0.4",
"@llamaindex/chroma": "^0.0.29",
"@llamaindex/clip": "^0.0.65",
"@llamaindex/cloud": "^4.0.24",
"@llamaindex/cohere": "^0.0.29",
"@llamaindex/core": "^0.6.15",
"@llamaindex/deepinfra": "^0.0.65",
"@llamaindex/deepseek": "^0.0.26",
"@llamaindex/discord": "^0.1.14",
"@llamaindex/elastic-search": "^0.1.15",
"@llamaindex/anthropic": "^0.3.19",
"@llamaindex/assemblyai": "^0.1.16",
"@llamaindex/astra": "^0.0.31",
"@llamaindex/azure": "^0.1.29",
"@llamaindex/bm25-retriever": "^0.0.6",
"@llamaindex/chroma": "^0.0.31",
"@llamaindex/clip": "^0.0.68",
"@llamaindex/cloud": "^4.0.26",
"@llamaindex/cohere": "^0.0.31",
"@llamaindex/core": "^0.6.17",
"@llamaindex/deepinfra": "^0.0.68",
"@llamaindex/deepseek": "^0.0.29",
"@llamaindex/discord": "^0.1.16",
"@llamaindex/elastic-search": "^0.1.17",
"@llamaindex/env": "^0.1.30",
"@llamaindex/firestore": "^1.0.22",
"@llamaindex/fireworks": "^0.0.25",
"@llamaindex/google": "^0.3.14",
"@llamaindex/groq": "^0.0.81",
"@llamaindex/huggingface": "^0.1.19",
"@llamaindex/jinaai": "^0.0.25",
"@llamaindex/milvus": "^0.1.24",
"@llamaindex/mistral": "^0.1.15",
"@llamaindex/mixedbread": "^0.0.29",
"@llamaindex/mongodb": "^0.0.30",
"@llamaindex/node-parser": "^2.0.15",
"@llamaindex/notion": "^0.1.14",
"@llamaindex/ollama": "^0.1.15",
"@llamaindex/openai": "^0.4.9",
"@llamaindex/perplexity": "^0.0.22",
"@llamaindex/pinecone": "^0.1.15",
"@llamaindex/portkey-ai": "^0.0.57",
"@llamaindex/postgres": "^0.0.58",
"@llamaindex/qdrant": "^0.1.25",
"@llamaindex/readers": "^3.1.14",
"@llamaindex/replicate": "^0.0.57",
"@llamaindex/supabase": "^0.1.15",
"@llamaindex/together": "^0.0.25",
"@llamaindex/tools": "^0.1.5",
"@llamaindex/upstash": "^0.0.29",
"@llamaindex/vercel": "^0.1.15",
"@llamaindex/vllm": "^0.0.51",
"@llamaindex/voyage-ai": "^1.0.21",
"@llamaindex/weaviate": "^0.0.30",
"@llamaindex/workflow": "^1.1.15",
"@llamaindex/xai": "^0.0.12",
"@llamaindex/firestore": "^1.0.24",
"@llamaindex/fireworks": "^0.0.28",
"@llamaindex/google": "^0.3.16",
"@llamaindex/groq": "^0.0.84",
"@llamaindex/huggingface": "^0.1.22",
"@llamaindex/jinaai": "^0.0.28",
"@llamaindex/milvus": "^0.1.26",
"@llamaindex/mistral": "^0.1.17",
"@llamaindex/mixedbread": "^0.0.31",
"@llamaindex/mongodb": "^0.0.32",
"@llamaindex/node-parser": "^2.0.17",
"@llamaindex/notion": "^0.1.16",
"@llamaindex/ollama": "^0.1.17",
"@llamaindex/openai": "^0.4.12",
"@llamaindex/perplexity": "^0.0.25",
"@llamaindex/pinecone": "^0.1.17",
"@llamaindex/portkey-ai": "^0.0.59",
"@llamaindex/postgres": "^0.0.60",
"@llamaindex/qdrant": "^0.1.27",
"@llamaindex/readers": "^3.1.16",
"@llamaindex/replicate": "^0.0.59",
"@llamaindex/supabase": "^0.1.18",
"@llamaindex/together": "^0.0.28",
"@llamaindex/tools": "^0.1.7",
"@llamaindex/upstash": "^0.0.31",
"@llamaindex/vercel": "^0.1.17",
"@llamaindex/vllm": "^0.0.54",
"@llamaindex/voyage-ai": "^1.0.23",
"@llamaindex/weaviate": "^0.0.32",
"@llamaindex/workflow": "^1.1.17",
"@llamaindex/xai": "^0.0.15",
"@notionhq/client": "^4.0.0",
"@pinecone-database/pinecone": "^4.0.0",
"@vercel/postgres": "^0.10.0",
@@ -65,7 +65,7 @@
"commander": "^12.1.0",
"dotenv": "^17.2.0",
"js-tiktoken": "^1.0.14",
"llamaindex": "^0.11.19",
"llamaindex": "^0.11.21",
"mongodb": "6.7.0",
"postgres": "^3.4.4",
"wikipedia": "^2.1.2",
+1 -1
View File
@@ -15,7 +15,7 @@ async function main() {
const vectorStore = new QdrantVectorStore({
url: process.env.QDRANT_URL,
apiKey: process.env.QDRANT_API_KEY,
embeddingModel: embedding,
embedModel: embedding,
collectionName: "gemini_test",
});
const storageContext = await storageContextFromDefaults({ vectorStore });
+1 -1
View File
@@ -16,7 +16,7 @@ async function main() {
const vectorStore = new QdrantVectorStore({
url: process.env.QDRANT_URL,
apiKey: process.env.QDRANT_API_KEY,
embeddingModel: embedding,
embedModel: embedding,
collectionName: "jina_test",
});
const storageContext = await storageContextFromDefaults({ vectorStore });
+12
View File
@@ -1,5 +1,17 @@
# @llamaindex/autotool
## 8.0.21
### Patch Changes
- llamaindex@0.11.21
## 8.0.20
### Patch Changes
- llamaindex@0.11.20
## 8.0.19
### Patch Changes
@@ -1,5 +1,19 @@
# @llamaindex/autotool-01-node-example
## 0.0.129
### Patch Changes
- llamaindex@0.11.21
- @llamaindex/autotool@8.0.21
## 0.0.128
### Patch Changes
- llamaindex@0.11.20
- @llamaindex/autotool@8.0.20
## 0.0.127
### Patch Changes
@@ -13,5 +13,5 @@
"scripts": {
"start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
},
"version": "0.0.127"
"version": "0.0.129"
}
+1 -1
View File
@@ -6,7 +6,7 @@
"url": "git+https://github.com/run-llama/LlamaIndexTS.git",
"directory": "packages/autotool"
},
"version": "8.0.19",
"version": "8.0.21",
"description": "auto transpile your JS function to LLM Agent compatible",
"files": [
"dist",
+15
View File
@@ -1,5 +1,20 @@
# @llamaindex/cloud
## 4.0.26
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 4.0.25
### Patch Changes
- 2967d57: Default to \_public agent url id
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 4.0.24
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloud",
"version": "4.0.24",
"version": "4.0.26",
"type": "module",
"license": "MIT",
"scripts": {
+21 -6
View File
@@ -33,7 +33,7 @@ export class AgentClient<T = unknown> {
apiKey = getEnv("LLAMA_CLOUD_API_KEY"),
baseUrl = "https://api.cloud.llamaindex.ai/",
collection = "default",
agentUrlId = "default",
agentUrlId = "_public",
}: {
apiKey?: string;
baseUrl?: string;
@@ -127,7 +127,7 @@ export class AgentClient<T = unknown> {
}
/**
* List agent data
* Search agent data
*/
async search(
options: SearchAgentDataOptions,
@@ -275,7 +275,8 @@ export interface AgentDataClientOptions<T = unknown> {
collection?: string;
}
/**
* Create a new AsyncAgentDataClient instance
* Create a new AsyncAgentDataClient instance. Does it's best to infer an agent url id from environment.
* Pass in the window url and/or env to infer the agent url id from them.
* @param options - The options for the client
* @returns A new AgentClient instance
*/
@@ -283,20 +284,34 @@ export function createAgentDataClient<T = unknown>({
apiKey,
baseUrl,
windowUrl,
env,
agentUrlId,
collection = "default",
}: {
apiKey?: string;
baseUrl?: string;
windowUrl?: string;
env?: Record<string, string>;
agentUrlId?: string;
collection?: string;
} = {}): AgentClient<T> {
if (env && !agentUrlId) {
agentUrlId =
env.LLAMA_DEPLOY_DEPLOYMENT_NAME ||
env.NEXT_PUBLIC_LLAMA_DEPLOY_DEPLOYMENT_NAME ||
env.VITE_LLAMA_DEPLOY_DEPLOYMENT_NAME;
}
if (windowUrl && !agentUrlId) {
try {
const path = new URL(windowUrl).pathname;
// /deployments/<agent-url-id>/ui/ -> ["", "deployments", "<agent-url-id>", "ui"]
agentUrlId = path.split("/")[2];
const url = new URL(windowUrl);
const path = url.pathname;
const isLocalhost = // local agents should default to _public, otherwise a full deployment is required
url.hostname.includes("localhost") ||
url.hostname.includes("127.0.0.1");
if (path.startsWith("/deployments/") && !isLocalhost) {
// /deployments/<agent-url-id>/ui/ -> ["", "deployments", "<agent-url-id>", "ui"]
agentUrlId = path.split("/")[2];
}
} catch (error) {
console.warn(
"Failed to infer agent url id from window url, falling back to default",
+12
View File
@@ -1,5 +1,17 @@
# @llamaindex/core
## 0.6.17
### Patch Changes
- 38da40b: feat: VectoryMemoryBlock
## 0.6.16
### Patch Changes
- a8ec08c: fix: ensure correct message content in agent workflow
## 0.6.15
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/core",
"type": "module",
"version": "0.6.15",
"version": "0.6.17",
"description": "LlamaIndex Core Module",
"exports": {
"./agent": {
+3 -1
View File
@@ -39,7 +39,9 @@ export abstract class BaseMemoryBlock<
*
* @returns The memory block content as an array of ChatMessage.
*/
abstract get(): Promise<MemoryMessage<TAdditionalMessageOptions>[]>;
abstract get(
messages?: MemoryMessage<TAdditionalMessageOptions>[],
): Promise<MemoryMessage<TAdditionalMessageOptions>[]>;
/**
* Store the messages in the memory block.
+1
View File
@@ -1,3 +1,4 @@
export { BaseMemoryBlock } from "./base";
export { FactExtractionMemoryBlock } from "./fact";
export { StaticMemoryBlock } from "./static";
export { VectorMemoryBlock } from "./vector";
+250
View File
@@ -0,0 +1,250 @@
import type { BaseEmbedding } from "../../embeddings";
import type { BaseNodePostprocessor } from "../../postprocessor";
import { BasePromptTemplate, defaultContextSystemPrompt } from "../../prompts";
import type { NodeWithScore } from "../../schema";
import { MetadataMode, TextNode } from "../../schema";
import { extractText } from "../../utils/llms";
import type {
BaseVectorStore,
MetadataFilter,
VectorStoreQuery,
} from "../../vector-store";
import { VectorStoreQueryMode } from "../../vector-store";
import type { MemoryMessage } from "../types";
import { BaseMemoryBlock, type MemoryBlockOptions } from "./base";
/**
* The options for the vector memory block.
*/
export type VectorMemoryBlockOptions = {
/**
* The vector store to use for retrieval.
*/
vectorStore: BaseVectorStore;
/**
* Maximum number of messages to include for context when retrieving.
* @default 5
*/
retrievalContextWindow?: number;
/**
* Template for formatting the retrieved information.
* @default new PromptTemplate({ template: "{{ text }}" })
*/
formatTemplate?: BasePromptTemplate;
/**
* List of node postprocessors to apply to the retrieved nodes containing messages.
*
* @default []
*/
nodePostprocessors?: BaseNodePostprocessor[];
/**
* Configuration options for vector store queries when retrieving memory.
*
* @default
* ```typescript
* {
* similarityTopK: 2, // Number of top similar results to return
* mode: VectorStoreQueryMode.DEFAULT, // Query mode for the vector store
* sessionFilterKey: "session_id", // Metadata key for session filtering
* filters: {
* filters: [
* { key: "session_id", value: "<current block id>", operator: "==" }
* ],
* condition: "and"
* }
* }
* ```
*
* Note: A session filter is automatically added to ensure memory isolation between blocks.
* If custom filters are provided, the session filter will be merged with them.
*/
queryOptions?: Partial<VectorMemoryBlockQueryOptions>;
} & MemoryBlockOptions;
export type VectorMemoryBlockQueryOptions = Omit<
VectorStoreQuery,
"queryEmbedding" | "queryStr"
> & {
sessionFilterKey: string;
};
/**
* A memory block that retrieves relevant information from a vector store.
*
* This block stores conversation history in a vector store and retrieves
* relevant information based on the most recent messages.
*/
export class VectorMemoryBlock<
TAdditionalMessageOptions extends object = object,
> extends BaseMemoryBlock<TAdditionalMessageOptions> {
private readonly vectorStore: BaseVectorStore;
private readonly retrievalContextWindow: number;
private readonly formatTemplate: BasePromptTemplate;
private readonly nodePostprocessors: BaseNodePostprocessor[];
private readonly queryOptions: VectorMemoryBlockQueryOptions;
constructor(options: VectorMemoryBlockOptions) {
super(options);
// Validate vector store
if (!options.vectorStore.storesText) {
throw new Error(
"vectorStore must store text to be used as a retrieval memory block",
);
}
this.vectorStore = options.vectorStore;
this.retrievalContextWindow = options.retrievalContextWindow ?? 5;
this.queryOptions = this.buildDefaultQueryOptions(options.queryOptions);
this.formatTemplate = options.formatTemplate ?? defaultContextSystemPrompt;
this.nodePostprocessors = options.nodePostprocessors ?? [];
}
get embedModel(): BaseEmbedding {
return this.vectorStore.embedModel;
}
async get(
messages: MemoryMessage<TAdditionalMessageOptions>[] = [],
): Promise<MemoryMessage<TAdditionalMessageOptions>[]> {
if (messages?.length === 0) return [];
// Use the last message or a context window of messages for the query
let context: MemoryMessage<TAdditionalMessageOptions>[];
if (
this.retrievalContextWindow > 1 &&
messages.length >= this.retrievalContextWindow
) {
context = messages.slice(-this.retrievalContextWindow);
} else {
context = messages;
}
const queryText = context
.map((message) => extractText(message.content))
.join("\n\n");
if (!queryText) return [];
// Create and execute the query
const queryEmbedding = await this.embedModel.getTextEmbedding(queryText);
const query: VectorStoreQuery = {
queryStr: queryText,
queryEmbedding,
...this.queryOptions,
};
const results = await this.vectorStore.query(query);
if (!results.nodes?.length) return [];
// Create nodes with scores
const nodesWithScores: NodeWithScore[] = results.nodes.map(
(node, index) => ({
node,
score: results.similarities?.[index] ?? undefined,
}),
);
// Apply postprocessors
let processedNodes = nodesWithScores;
for (const postprocessor of this.nodePostprocessors) {
processedNodes = await postprocessor.postprocessNodes(
processedNodes,
queryText,
);
}
// Format the results
const retrievedText = processedNodes
.map(({ node }) => node.getContent(MetadataMode.NONE))
.join("\n\n");
const formattedText = this.formatTemplate.format({
context: retrievedText,
});
// Return as memory message
return [
{
id: this.id,
role: "memory",
content: formattedText,
} as MemoryMessage<TAdditionalMessageOptions>,
];
}
async put(
messages: MemoryMessage<TAdditionalMessageOptions>[],
): Promise<void> {
if (messages.length === 0) return;
// Format messages with role, text content, and additional info
const texts: string[] = [];
for (const message of messages) {
const text = extractText(message.content);
if (!text) continue;
let messageText = text;
// Add additional info if present
const additionalInfo = (message.options ?? {}) as Record<string, unknown>;
if (Object.keys(additionalInfo).length > 0) {
messageText += `\nAdditional Info: (${JSON.stringify(additionalInfo)})`;
}
texts.push(`<message role='${message.role}'>${messageText}</message>`);
}
if (texts.length === 0) return;
// Create text node with session metadata
const textNode = new TextNode({
text: texts.join("\n"),
metadata: { [this.queryOptions.sessionFilterKey]: this.id },
});
// Get embedding for the text
textNode.embedding = await this.embedModel.getTextEmbedding(textNode.text);
// Add to vector store
await this.vectorStore.add([textNode]);
}
private buildDefaultQueryOptions(
options: Partial<VectorMemoryBlockQueryOptions> | undefined,
): VectorMemoryBlockQueryOptions {
const {
similarityTopK = 2,
mode = VectorStoreQueryMode.DEFAULT,
sessionFilterKey = "session_id",
} = options ?? {};
let filters = options?.filters;
const sessionFilter: MetadataFilter = {
key: sessionFilterKey,
value: this.id,
operator: "==",
};
if (filters) {
// Only add session_id filter if it doesn't exist in the filters list
const sessionIdFilterExists = filters.filters.some(
(filter) => filter.key === sessionFilterKey,
);
if (!sessionIdFilterExists) {
filters.filters.push(sessionFilter);
}
} else {
// If no filters are provided, add the session_id filter
filters = {
filters: [sessionFilter],
condition: "and",
};
}
return { ...options, similarityTopK, mode, sessionFilterKey, filters };
}
}
+15
View File
@@ -8,6 +8,10 @@ import {
StaticMemoryBlock,
type StaticMemoryBlockOptions,
} from "./block/static";
import {
VectorMemoryBlock,
type VectorMemoryBlockOptions,
} from "./block/vector";
import { DEFAULT_TOKEN_LIMIT, Memory, type MemoryOptions } from "./memory";
import type { MemoryMessage } from "./types";
@@ -115,6 +119,17 @@ export function factExtractionBlock<TMessageOptions extends object = object>(
return new FactExtractionMemoryBlock<TMessageOptions>(options);
}
/**
* create a VectorMemoryBlock
* @param options - Configuration options for the vector memory block
* @returns A new VectorMemoryBlock instance
*/
export function vectorBlock<TMessageOptions extends object = object>(
options: VectorMemoryBlockOptions,
): VectorMemoryBlock<TMessageOptions> {
return new VectorMemoryBlock<TMessageOptions>(options);
}
/**
* Creates a new Memory instance from a snapshot
* @param snapshot The snapshot to load from
+36 -3
View File
@@ -31,6 +31,13 @@ export type MemoryOptions<TMessageOptions extends object = object> = {
* Used internally for memory restoration from snapshots.
*/
memoryCursor?: number;
/**
* The default LLM to use for memory retrieval.
* If not provided, the default `Settings.llm` will be used.
* This default LLM can be overridden by the LLM passed in the `getLLM` method.
*/
llm?: LLM | undefined;
};
export class Memory<
@@ -65,6 +72,10 @@ export class Memory<
* The cursor for the messages that have been processed into long-term memory.
*/
private memoryCursor: number = 0;
/**
* The default LLM to use for memory retrieval.
*/
private llm: LLM | undefined;
constructor(
messages: MemoryMessage<TMessageOptions>[] = [],
@@ -76,6 +87,7 @@ export class Memory<
options.shortTermTokenLimitRatio ?? DEFAULT_SHORT_TERM_TOKEN_LIMIT_RATIO;
this.memoryBlocks = options.memoryBlocks ?? [];
this.memoryCursor = options.memoryCursor ?? 0;
this.initLLM(options.llm);
this.adapters = {
...options.customAdapters,
@@ -84,6 +96,15 @@ export class Memory<
} as TAdapters & BuiltinAdapters<TMessageOptions>;
}
private initLLM(llm: LLM | undefined) {
// safe initialize LLM without throwing error if Settings.llm hasn't been set yet
try {
this.llm = llm ?? Settings.llm;
} catch (error) {
this.llm = undefined;
}
}
/**
* Add a message to the memory
* @param message - The message to add to the memory
@@ -160,12 +181,13 @@ export class Memory<
/**
* Get the messages from the memory, optionally including transient messages.
* only return messages that are within context window of the LLM
* @param llm - To fit the result messages to the context window of the LLM. If not provided, the default token limit will be used.
* @param llm - To fit the result messages to the context window of the LLM (fallback to default llm if not provided).
* If llm is not specified in both the constructor and the method, the default token limit will be used.
* @param transientMessages - Optional transient messages to include.
* @returns The messages from the memory, optionally including transient messages.
*/
async getLLM(
llm?: LLM,
llm: LLM | undefined = this.llm,
transientMessages?: ChatMessage<TMessageOptions>[],
): Promise<ChatMessage[]> {
// Priority of result messages:
@@ -176,11 +198,20 @@ export class Memory<
? Math.ceil(contextWindow * DEFAULT_TOKEN_LIMIT_RATIO)
: this.tokenLimit;
let blockInputMessages = this.messages;
if (transientMessages && transientMessages.length > 0) {
blockInputMessages = [
...this.messages,
...transientMessages.map((m) => this.adapters.llamaindex.toMemory(m)),
];
}
// Start with fixed block messages (priority=0)
// as it must always be included in the retrieval result
const messages = await this.getMemoryBlockMessages(
this.memoryBlocks.filter((block) => block.priority === 0),
tokenLimit,
blockInputMessages,
);
// remaining token limit for short-term and memory blocks content
const remainingTokenLimit =
@@ -207,6 +238,7 @@ export class Memory<
const longTermBlockMessages = await this.getMemoryBlockMessages(
longTermBlocks,
memoryBlocksTokenLimit,
blockInputMessages,
);
messages.push(...longTermBlockMessages);
@@ -252,6 +284,7 @@ export class Memory<
private async getMemoryBlockMessages(
blocks: BaseMemoryBlock<TMessageOptions>[],
tokenLimit?: number,
messages?: MemoryMessage<TMessageOptions>[],
): Promise<ChatMessage<TMessageOptions>[]> {
if (blocks.length === 0) {
return [];
@@ -265,7 +298,7 @@ export class Memory<
let addedTokenCount = 0;
for (const block of sortedBlocks) {
try {
const content = await block.get();
const content = await block.get(messages);
for (const message of content) {
const chatMessage = this.adapters.llamaindex.fromMemory(message);
const messageTokenCount = this.countMessagesToken([chatMessage]);
+35
View File
@@ -56,10 +56,45 @@ export function prettifyError(error: unknown): string {
}
}
/**
* Returns a stringfied JSON with double quotes removed.
*
* @param value - The JSON value to stringify
* @returns The stringified JSON with no double quotes
*/
export function stringifyJSONToMessageContent(value: JSONValue): string {
return JSON.stringify(value, null, 2).replace(/"([^"]*)"/g, "$1");
}
export function assertIsJSONValue(value: unknown): asserts value is JSONValue {
if (
typeof value === "string" ||
typeof value === "number" ||
typeof value === "boolean"
) {
return;
}
if (Array.isArray(value)) {
for (const item of value) {
assertIsJSONValue(item);
}
return;
}
if (typeof value === "object" && value !== null) {
for (const [key, val] of Object.entries(value)) {
if (typeof key !== "string") {
throw new Error(`Invalid object key: ${key}`);
}
assertIsJSONValue(val);
}
return;
}
throw new Error(`Value is not a valid JSONValue: ${String(value)}`);
}
export {
extractDataUrlComponents,
extractImage,
+4 -1
View File
@@ -101,7 +101,9 @@ export type VectorStoreByType = {
};
export type VectorStoreBaseParams = {
// @deprecated: use embedModel instead
embeddingModel?: BaseEmbedding | undefined;
embedModel?: BaseEmbedding | undefined;
};
export abstract class BaseVectorStore<Client = unknown, T = unknown> {
@@ -117,7 +119,8 @@ export abstract class BaseVectorStore<Client = unknown, T = unknown> {
): Promise<VectorStoreQueryResult>;
protected constructor(params?: VectorStoreBaseParams) {
this.embedModel = params?.embeddingModel ?? Settings.embedModel;
this.embedModel =
params?.embedModel ?? params?.embeddingModel ?? Settings.embedModel;
}
}
+12
View File
@@ -1,5 +1,17 @@
# @llamaindex/experimental
## 0.0.198
### Patch Changes
- llamaindex@0.11.21
## 0.0.197
### Patch Changes
- llamaindex@0.11.20
## 0.0.196
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/experimental",
"description": "Experimental package for LlamaIndexTS",
"version": "0.0.196",
"version": "0.0.198",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
+21
View File
@@ -1,5 +1,26 @@
# llamaindex
## 0.11.21
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/cloud@4.0.26
- @llamaindex/node-parser@2.0.17
- @llamaindex/workflow@1.1.17
## 0.11.20
### Patch Changes
- Updated dependencies [a8ec08c]
- Updated dependencies [2967d57]
- @llamaindex/core@0.6.16
- @llamaindex/workflow@1.1.16
- @llamaindex/cloud@4.0.25
- @llamaindex/node-parser@2.0.16
## 0.11.19
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "llamaindex",
"version": "0.11.19",
"version": "0.11.21",
"license": "MIT",
"type": "module",
"keywords": [
@@ -272,7 +272,7 @@ export class SimpleVectorStore extends BaseVectorStore {
static async fromPersistPath(
persistPath: string,
embeddingModel?: BaseEmbedding,
embedModel?: BaseEmbedding,
): Promise<SimpleVectorStore> {
const dirPath = path.dirname(persistPath);
if (!(await exists(dirPath))) {
@@ -300,20 +300,20 @@ export class SimpleVectorStore extends BaseVectorStore {
data.textIdToRefDocId = dataDict.textIdToRefDocId ?? {};
// @ts-expect-error TS2322
data.metadataDict = dataDict.metadataDict ?? {};
const store = new SimpleVectorStore({ data, embeddingModel });
const store = new SimpleVectorStore({ data, embedModel });
store.persistPath = persistPath;
return store;
}
static fromDict(
saveDict: SimpleVectorStoreData,
embeddingModel?: BaseEmbedding,
embedModel?: BaseEmbedding,
): SimpleVectorStore {
const data = new SimpleVectorStoreData();
data.embeddingDict = saveDict.embeddingDict;
data.textIdToRefDocId = saveDict.textIdToRefDocId;
data.metadataDict = saveDict.metadataDict;
return new SimpleVectorStore({ data, embeddingModel });
return new SimpleVectorStore({ data, embedModel });
}
toDict(): SimpleVectorStoreData {
+19
View File
@@ -1,5 +1,24 @@
# @llamaindex/core-test
## 0.1.13
### Patch Changes
- @llamaindex/openai@0.4.12
## 0.1.12
### Patch Changes
- @llamaindex/openai@0.4.11
## 0.1.11
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.1.10
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/llamaindex-test",
"private": true,
"version": "0.1.10",
"version": "0.1.13",
"type": "module",
"scripts": {
"test": "vitest run"
@@ -59,7 +59,7 @@ describe("SimpleVectorStore", () => {
}),
];
store = new SimpleVectorStore({
embeddingModel: {} as BaseEmbedding, // Mocking the embedModel
embedModel: {} as BaseEmbedding, // Mocking the embedModel
data: {
embeddingDict: {},
textIdToRefDocId: {},
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/node-parser
## 2.0.17
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 2.0.16
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 2.0.15
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/node-parser",
"version": "2.0.15",
"version": "2.0.17",
"description": "Node parser for LlamaIndex",
"type": "module",
"exports": {
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/anthropic
## 0.3.19
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.3.18
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.3.17
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/anthropic",
"description": "Anthropic Adapter for LlamaIndex",
"version": "0.3.17",
"version": "0.3.19",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
@@ -1,5 +1,19 @@
# @llamaindex/assemblyai
## 0.1.16
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.1.15
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.1.14
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/assemblyai",
"description": "AssemblyAI Reader for LlamaIndex",
"version": "0.1.14",
"version": "0.1.16",
"type": "module",
"types": "dist/index.d.ts",
"main": "dist/index.cjs",
+15
View File
@@ -1,5 +1,20 @@
# @llamaindex/community
## 0.0.112
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.0.111
### Patch Changes
- 678b327: feat: added apac bedrock models
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.0.110
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/aws",
"description": "AWS package for LlamaIndexTS",
"version": "0.0.110",
"version": "0.0.112",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
@@ -134,6 +134,19 @@ export const INFERENCE_BEDROCK_MODELS = {
EU_AMAZON_NOVA_PRO_1: "eu.amazon.nova-pro-v1:0",
EU_AMAZON_NOVA_LITE_1: "eu.amazon.nova-lite-v1:0",
EU_AMAZON_NOVA_MICRO_1: "eu.amazon.nova-micro-v1:0",
APAC_ANTHROPIC_CLAUDE_3_5_SONNET:
"apac.anthropic.claude-3-5-sonnet-20240620-v1:0",
APAC_ANTHROPIC_CLAUDE_3_5_SONNET_V2:
"apac.anthropic.claude-3-5-sonnet-20241022-v2:0",
APAC_ANTHROPIC_CLAUDE_3_7_SONNET:
"apac.anthropic.claude-3-7-sonnet-20250219-v1:0",
APAC_ANTHROPIC_CLAUDE_3_HAIKU: "apac.anthropic.claude-3-haiku-20240307-v1:0",
APAC_ANTHROPIC_CLAUDE_3_SONNET:
"apac.anthropic.claude-3-sonnet-20240229-v1:0",
APAC_AMAZON_NOVA_PRO_1: "apac.amazon.nova-pro-v1:0",
APAC_AMAZON_NOVA_LITE_1: "apac.amazon.nova-lite-v1:0",
APAC_AMAZON_NOVA_MICRO_1: "apac.amazon.nova-micro-v1:0",
};
export type INFERENCE_BEDROCK_MODELS =
@@ -206,6 +219,24 @@ export const INFERENCE_TO_BEDROCK_MAP: Record<
BEDROCK_MODELS.AMAZON_NOVA_LITE_1,
[INFERENCE_BEDROCK_MODELS.EU_AMAZON_NOVA_MICRO_1]:
BEDROCK_MODELS.AMAZON_NOVA_MICRO_1,
[INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_5_SONNET]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_5_SONNET,
[INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_5_SONNET_V2]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_5_SONNET_V2,
[INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_7_SONNET]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_7_SONNET,
[INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_HAIKU]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_HAIKU,
[INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_SONNET]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_SONNET,
[INFERENCE_BEDROCK_MODELS.APAC_AMAZON_NOVA_PRO_1]:
BEDROCK_MODELS.AMAZON_NOVA_PRO_1,
[INFERENCE_BEDROCK_MODELS.APAC_AMAZON_NOVA_LITE_1]:
BEDROCK_MODELS.AMAZON_NOVA_LITE_1,
[INFERENCE_BEDROCK_MODELS.APAC_AMAZON_NOVA_MICRO_1]:
BEDROCK_MODELS.AMAZON_NOVA_MICRO_1,
};
/*
+23
View File
@@ -1,5 +1,28 @@
# @llamaindex/clip
## 0.0.68
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/openai@0.4.12
## 0.0.67
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
- @llamaindex/openai@0.4.11
## 0.0.66
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.65
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/clip",
"description": "Clip Embedding Adapter for LlamaIndex",
"version": "0.0.65",
"version": "0.0.68",
"type": "module",
"types": "dist/index.d.ts",
"main": "dist/index.cjs",
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/cohere
## 0.0.31
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.0.30
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.0.29
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/cohere",
"description": "Cohere Adapter for LlamaIndex",
"version": "0.0.29",
"version": "0.0.31",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+23
View File
@@ -1,5 +1,28 @@
# @llamaindex/deepinfra
## 0.0.68
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/openai@0.4.12
## 0.0.67
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
- @llamaindex/openai@0.4.11
## 0.0.66
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.65
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/deepinfra",
"description": "Deepinfra Adapter for LlamaIndex",
"version": "0.0.65",
"version": "0.0.68",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+19
View File
@@ -1,5 +1,24 @@
# @llamaindex/deepseek
## 0.0.29
### Patch Changes
- @llamaindex/openai@0.4.12
## 0.0.28
### Patch Changes
- @llamaindex/openai@0.4.11
## 0.0.27
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.26
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/deepseek",
"description": "DeepSeek Adapter for LlamaIndex",
"version": "0.0.26",
"version": "0.0.29",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/discord
## 0.1.16
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.1.15
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.1.14
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/discord",
"description": "Discord Reader for LlamaIndex",
"version": "0.1.14",
"version": "0.1.16",
"type": "module",
"types": "dist/index.d.ts",
"main": "dist/index.cjs",
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/excel
## 0.1.17
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.1.16
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.1.15
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/excel",
"description": "Excel Reader for LlamaIndex",
"version": "0.1.15",
"version": "0.1.17",
"type": "module",
"types": "dist/index.d.ts",
"main": "dist/index.cjs",
+19
View File
@@ -1,5 +1,24 @@
# @llamaindex/fireworks
## 0.0.28
### Patch Changes
- @llamaindex/openai@0.4.12
## 0.0.27
### Patch Changes
- @llamaindex/openai@0.4.11
## 0.0.26
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.25
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/fireworks",
"description": "Fireworks Adapter for LlamaIndex",
"version": "0.0.25",
"version": "0.0.28",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+15
View File
@@ -1,5 +1,20 @@
# @llamaindex/google
## 0.3.16
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.3.15
### Patch Changes
- 650eeb1: fix: GeminiEmbedding should send batches of max 100
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.3.14
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/google",
"description": "Google Adapter for LlamaIndex",
"version": "0.3.14",
"version": "0.3.16",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
@@ -0,0 +1,248 @@
import { beforeEach, describe, expect, test, vi } from "vitest";
import {
DEFAULT_EMBED_BATCH_SIZE,
GEMINI_EMBEDDING_MODEL,
GeminiEmbedding,
} from "./GeminiEmbedding";
// Mock the Google GenAI module
const mockEmbedContent = vi.fn();
vi.mock("@google/genai", () => ({
GoogleGenAI: vi.fn().mockImplementation(() => ({
models: {
embedContent: mockEmbedContent,
},
})),
}));
describe("GeminiEmbedding", () => {
let geminiEmbedding: GeminiEmbedding;
// Move capturedBatches to outer scope so all tests can access it
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let capturedBatches: any[];
beforeEach(() => {
vi.clearAllMocks();
geminiEmbedding = new GeminiEmbedding({
model: GEMINI_EMBEDDING_MODEL.EMBEDDING_001,
apiKey: "test-api-key",
});
// Default mock for other tests
mockEmbedContent.mockResolvedValue({
embeddings: [
{ values: [0.1, 0.2, 0.3] },
{ values: [0.4, 0.5, 0.6] },
{ values: [0.7, 0.8, 0.9] },
],
});
});
describe("getTextEmbeddingsBatch", () => {
beforeEach(() => {
// Reset and set up capturedBatches and the mock implementation for all tests in this suite
capturedBatches = [];
mockEmbedContent.mockImplementation((args) => {
capturedBatches.push({
...args,
contents: Array.isArray(args.contents)
? [...args.contents]
: args.contents,
});
return Promise.resolve({
embeddings: Array.from(
{ length: Array.isArray(args.contents) ? args.contents.length : 1 },
(_, i) => ({
values: [i * 0.1, i * 0.2, i * 0.3],
}),
),
});
});
});
test("should respect batch size limit of 10 for texts longer than 10", async () => {
// Create a list of 2.5x the batch size texts, to exceed the batch size
const texts = Array.from(
{ length: DEFAULT_EMBED_BATCH_SIZE * 2.5 },
(_, i) => `text ${i + 1}`,
);
await geminiEmbedding.getTextEmbeddingsBatch(texts);
// Verify that embedContent was called exactly 3 times (ceil(250/100) = 3)
expect(mockEmbedContent).toHaveBeenCalledTimes(3);
// Verify that each call had no more than 100 texts
const calls = mockEmbedContent.mock.calls;
// First batch should have DEFAULT_EMBED_BATCH_SIZE texts
expect(capturedBatches[0].contents).toHaveLength(
DEFAULT_EMBED_BATCH_SIZE,
);
expect(capturedBatches[0].contents).toEqual(
texts.slice(0 * DEFAULT_EMBED_BATCH_SIZE, 1 * DEFAULT_EMBED_BATCH_SIZE),
);
// Second batch should have DEFAULT_EMBED_BATCH_SIZE texts
expect(capturedBatches[1].contents).toHaveLength(
DEFAULT_EMBED_BATCH_SIZE,
);
expect(capturedBatches[1].contents).toEqual(
texts.slice(1 * DEFAULT_EMBED_BATCH_SIZE, 2 * DEFAULT_EMBED_BATCH_SIZE),
);
// Third batch should have 0.5 * DEFAULT_EMBED_BATCH_SIZE texts (remaining)
expect(capturedBatches[2].contents).toHaveLength(
DEFAULT_EMBED_BATCH_SIZE * 0.5,
);
expect(capturedBatches[2].contents).toEqual(
texts.slice(
2 * DEFAULT_EMBED_BATCH_SIZE,
2.5 * DEFAULT_EMBED_BATCH_SIZE,
),
);
});
test("should handle exactly DEFAULT_EMBED_BATCH_SIZE texts in a single batch", async () => {
const texts = Array.from(
{ length: DEFAULT_EMBED_BATCH_SIZE },
(_, i) => `text ${i + 1}`,
);
await geminiEmbedding.getTextEmbeddingsBatch(texts);
// Should be called exactly once
expect(mockEmbedContent).toHaveBeenCalledTimes(1);
// // Should contain all 100 texts
expect(capturedBatches[0]?.contents).toHaveLength(
DEFAULT_EMBED_BATCH_SIZE,
);
expect(capturedBatches[0]?.contents).toEqual(texts);
});
test("should handle texts shorter than batch size", async () => {
const short_batch_length = 5; // Less than DEFAULT_EMBED_BATCH_SIZE
const texts = Array.from(
{ length: short_batch_length },
(_, i) => `text ${i + 1}`,
);
await geminiEmbedding.getTextEmbeddingsBatch(texts);
// Should be called exactly once
expect(mockEmbedContent).toHaveBeenCalledTimes(1);
// Should contain all 5 texts
expect(capturedBatches[0].contents).toHaveLength(short_batch_length);
expect(capturedBatches[0].contents).toEqual(texts);
});
test("should handle large batches correctly (100 texts)", async () => {
const n_batches = 10;
const texts = Array.from(
{ length: DEFAULT_EMBED_BATCH_SIZE * n_batches },
(_, i) => `text ${i + 1}`,
);
await geminiEmbedding.getTextEmbeddingsBatch(texts);
// Should be called exactly 10 times
expect(mockEmbedContent).toHaveBeenCalledTimes(n_batches);
// Verify each batch has exactly DEFAULT_EMBED_BATCH_SIZE texts
for (let i = 0; i < n_batches; i++) {
expect(capturedBatches[i].contents).toHaveLength(
DEFAULT_EMBED_BATCH_SIZE,
);
expect(capturedBatches[i].contents).toEqual(
texts.slice(
i * DEFAULT_EMBED_BATCH_SIZE,
(i + 1) * DEFAULT_EMBED_BATCH_SIZE,
),
);
}
});
test("should return correct embeddings for all texts", async () => {
const texts = ["text1", "text2", "text3"];
mockEmbedContent.mockResolvedValueOnce({
embeddings: [
{ values: [0.1, 0.2, 0.3] },
{ values: [0.4, 0.5, 0.6] },
{ values: [0.7, 0.8, 0.9] },
],
});
const result = await geminiEmbedding.getTextEmbeddingsBatch(texts);
expect(result).toEqual([
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
[0.7, 0.8, 0.9],
]);
});
test("should handle empty embeddings gracefully", async () => {
const texts = ["text1", "text2"];
mockEmbedContent.mockResolvedValueOnce({
embeddings: [{ values: undefined }, { values: [0.1, 0.2, 0.3] }],
});
const result = await geminiEmbedding.getTextEmbeddingsBatch(texts);
expect(result).toEqual([[], [0.1, 0.2, 0.3]]);
});
test("should handle missing embeddings array", async () => {
const texts = ["text1"];
mockEmbedContent.mockResolvedValueOnce({
embeddings: undefined,
});
const result = await geminiEmbedding.getTextEmbeddingsBatch(texts);
expect(result).toEqual([]);
});
});
describe("getTextEmbedding", () => {
test("should call embedContent with single text", async () => {
const text = "single text";
mockEmbedContent.mockResolvedValueOnce({
embeddings: [{ values: [0.1, 0.2, 0.3] }],
});
const result = await geminiEmbedding.getTextEmbedding(text);
expect(mockEmbedContent).toHaveBeenCalledTimes(1);
expect(mockEmbedContent).toHaveBeenCalledWith({
model: GEMINI_EMBEDDING_MODEL.EMBEDDING_001,
contents: text,
});
expect(result).toEqual([0.1, 0.2, 0.3]);
});
});
describe("constructor", () => {
test("should set default model and batch size", () => {
const embedding = new GeminiEmbedding({ apiKey: "test-key" });
expect(embedding.model).toBe(GEMINI_EMBEDDING_MODEL.EMBEDDING_001);
expect(embedding.embedBatchSize).toBe(DEFAULT_EMBED_BATCH_SIZE);
});
test("should use provided model", () => {
const new_batch_size = 50;
const embedding = new GeminiEmbedding({
model: GEMINI_EMBEDDING_MODEL.TEXT_EMBEDDING_004,
apiKey: "test-key",
embedBatchSize: new_batch_size,
});
expect(embedding.model).toBe(GEMINI_EMBEDDING_MODEL.TEXT_EMBEDDING_004);
expect(embedding.embedBatchSize).toBe(new_batch_size);
});
});
});
@@ -1,5 +1,9 @@
import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
import { BaseEmbedding } from "@llamaindex/core/embeddings";
import {
BaseEmbedding,
batchEmbeddings,
type BaseEmbeddingOptions,
} from "@llamaindex/core/embeddings";
import { getEnv } from "@llamaindex/env";
export enum GEMINI_EMBEDDING_MODEL {
@@ -7,11 +11,15 @@ export enum GEMINI_EMBEDDING_MODEL {
TEXT_EMBEDDING_004 = "text-embedding-004",
}
// 100 is max batch size, see https://github.com/run-llama/LlamaIndexTS/pull/2099
export const DEFAULT_EMBED_BATCH_SIZE = 100;
/**
* Configuration options for GeminiEmbedding.
*/
export type GeminiEmbeddingOptions = {
model?: GEMINI_EMBEDDING_MODEL;
embedBatchSize?: number;
} & GoogleGenAIOptions;
/**
@@ -20,6 +28,7 @@ export type GeminiEmbeddingOptions = {
export class GeminiEmbedding extends BaseEmbedding {
model: GEMINI_EMBEDDING_MODEL;
ai: GoogleGenAI;
embedBatchSize: number = DEFAULT_EMBED_BATCH_SIZE;
constructor(opts?: GeminiEmbeddingOptions) {
super();
@@ -31,15 +40,27 @@ export class GeminiEmbedding extends BaseEmbedding {
this.ai = new GoogleGenAI({ ...opts, apiKey });
this.model = opts?.model ?? GEMINI_EMBEDDING_MODEL.EMBEDDING_001;
this.embedBatchSize = opts?.embedBatchSize ?? DEFAULT_EMBED_BATCH_SIZE;
}
async getTextEmbeddingsBatch(texts: string[]): Promise<number[][]> {
getTextEmbeddings = async (texts: string[]) => {
const result = await this.ai.models.embedContent({
model: this.model,
contents: texts,
});
return result.embeddings?.map((embedding) => embedding.values ?? []) ?? [];
};
async getTextEmbeddingsBatch(
texts: string[],
options?: BaseEmbeddingOptions,
): Promise<Array<number[]>> {
return await batchEmbeddings(
texts,
this.getTextEmbeddings.bind(this),
this.embedBatchSize,
options,
);
}
async getTextEmbedding(text: string): Promise<number[]> {
+19
View File
@@ -1,5 +1,24 @@
# @llamaindex/groq
## 0.0.84
### Patch Changes
- @llamaindex/openai@0.4.12
## 0.0.83
### Patch Changes
- @llamaindex/openai@0.4.11
## 0.0.82
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.81
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/groq",
"description": "Groq Adapter for LlamaIndex",
"version": "0.0.81",
"version": "0.0.84",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
@@ -1,5 +1,28 @@
# @llamaindex/huggingface
## 0.1.22
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/openai@0.4.12
## 0.1.21
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
- @llamaindex/openai@0.4.11
## 0.1.20
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.1.19
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/huggingface",
"description": "Huggingface Adapter for LlamaIndex",
"version": "0.1.19",
"version": "0.1.22",
"type": "module",
"types": "dist/index.d.ts",
"main": "dist/index.cjs",
+23
View File
@@ -1,5 +1,28 @@
# @llamaindex/jinaai
## 0.0.28
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/openai@0.4.12
## 0.0.27
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
- @llamaindex/openai@0.4.11
## 0.0.26
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.25
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/jinaai",
"description": "JinaAI Adapter for LlamaIndex",
"version": "0.0.25",
"version": "0.0.28",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/mistral
## 0.1.17
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.1.16
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.1.15
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/mistral",
"description": "Mistral Adapter for LlamaIndex",
"version": "0.1.15",
"version": "0.1.17",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
@@ -1,5 +1,19 @@
# @llamaindex/mixedbread
## 0.0.31
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.0.30
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.0.29
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/mixedbread",
"description": "Mixedbread Adapter for LlamaIndex",
"version": "0.0.29",
"version": "0.0.31",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/notion
## 0.1.16
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.1.15
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.1.14
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/notion",
"description": "Notion Reader for LlamaIndex",
"version": "0.1.14",
"version": "0.1.16",
"type": "module",
"types": "dist/index.d.ts",
"main": "dist/index.cjs",
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/ollama
## 0.1.17
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.1.16
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.1.15
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/ollama",
"description": "Ollama Adapter for LlamaIndex",
"version": "0.1.15",
"version": "0.1.17",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+20
View File
@@ -1,5 +1,25 @@
# @llamaindex/openai
## 0.4.12
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.4.11
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.4.10
### Patch Changes
- 856dd8c: fix: assume new models are function call models
## 0.4.9
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/openai",
"description": "OpenAI Adapter for LlamaIndex",
"version": "0.4.9",
"version": "0.4.12",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+2 -2
View File
@@ -383,8 +383,8 @@ export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
// skip parts that don't have any content
if (
!(
choice.delta.content ||
choice.delta.tool_calls ||
choice.delta?.content ||
choice.delta?.tool_calls ||
choice.finish_reason
)
)
+1 -2
View File
@@ -149,10 +149,9 @@ export function isFunctionCallingModel(llm: LLM): llm is OpenAI {
} else {
return false;
}
const isChatModel = Object.keys(ALL_AVAILABLE_OPENAI_MODELS).includes(model);
const isOld = model.includes("0314") || model.includes("0301");
const isO1 = model.startsWith("o1");
return isChatModel && !isOld && !isO1;
return !isOld && !isO1;
}
export function isReasoningModel(model: ChatModel | string): boolean {
+55 -2
View File
@@ -1,5 +1,9 @@
import { ChatMessage, ToolCallLLMMessageOptions } from "@llamaindex/core/llms";
import { describe, expect, it } from "vitest";
import {
ChatMessage,
ChatResponseChunk,
ToolCallLLMMessageOptions,
} from "@llamaindex/core/llms";
import { describe, expect, it, vi } from "vitest";
import { z } from "zod";
import { OpenAI } from "../src/llm";
@@ -231,3 +235,52 @@ describe("OpenAI Static Methods", () => {
});
});
});
describe("OpenAI streamChat", () => {
it("should handle choice with empty delta and finish_reason stop", async () => {
// Create a mock OpenAI instance
const mockStream = async function* () {
yield {
choices: [
{
delta: {},
finish_reason: "stop",
index: 0,
logprobs: null,
},
],
};
};
// Mock the OpenAI session and chat completions
const mockSession = {
chat: {
completions: {
create: vi.fn().mockResolvedValue(mockStream()),
},
},
};
const openai = new OpenAI({
model: "gpt-4o-mini",
apiKey: "test-key",
// @ts-expect-error: mockSession is a mock object for testing purposes
session: mockSession,
});
// @ts-expect-error accessing protected method
const stream = openai.streamChat({
messages: [{ role: "user" as const, content: "Hello" }],
stream: true,
});
const chunks: ChatResponseChunk[] = [];
for await (const chunk of stream) {
chunks.push(chunk);
}
expect(chunks).toHaveLength(1);
expect(chunks[0].options).toEqual({});
expect(chunks[0].delta).toBe("");
});
});
@@ -1,5 +1,28 @@
# @llamaindex/perplexity
## 0.0.25
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/openai@0.4.12
## 0.0.24
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
- @llamaindex/openai@0.4.11
## 0.0.23
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.22
### Patch Changes

Some files were not shown because too many files have changed in this diff Show More