Compare commits

...

17 Commits

Author SHA1 Message Date
github-actions[bot] e4c7113614 Release 0.11.21 (#2128)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
2025-07-22 12:23:58 +08:00
Thuc Pham 38da40bc98 feat: VectoryMemoryBlock (#2110)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-07-22 12:18:09 +08:00
Marcus Schiesser 4d50ca4d84 chore: add streamchat test (#2122) 2025-07-22 11:30:01 +08:00
github-actions[bot] 8b5253a297 Release (#2127) 2025-07-21 15:40:31 -06:00
Logan ea15e75c89 deployment docs nits (#2126) 2025-07-21 15:30:37 -06:00
github-actions[bot] 3be87d4670 Release 0.11.20 (#2121)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: himself65 <14026360+himself65@users.noreply.github.com>
2025-07-21 09:37:44 -07:00
Terence Sim 94da13db0d fix: azure openai streamchat empty delta throw TypeError (#2118)
Co-authored-by: Terence Sim <40583743+InTheAxis@users.noreply.github.com>
2025-07-21 09:16:09 -07:00
Terence Sim acd50ea99f chore: replaced console.log with logger type from @llamaindex/env (#2123)
Co-authored-by: Terence Sim <40583743+InTheAxis@users.noreply.github.com>
2025-07-21 09:14:06 -07:00
Adrian Lyjak 2967d57ac0 feat: default to _public agent data (#2117) 2025-07-21 09:07:15 -07:00
Thuc Pham a8ec08c682 fix: ensure correct message content in agent workflow (#2114)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-07-21 15:13:27 +08:00
Terence Sim 678b327051 feat: added apac bedrock models (#2119)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2025-07-21 12:13:37 +08:00
Jeremy B. Merrill 650eeb1df3 fix: GeminiEmbedding should send batches of max 100 (#2099)
Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>
2025-07-21 12:12:42 +08:00
Laurie Voss 50f6747758 Instrumenting with Google Tag Manager (in addition to Google Analytics) (#2116) 2025-07-20 13:18:09 -07:00
github-actions[bot] 12414a6836 Release (#2113)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: marcusschiesser <17126+marcusschiesser@users.noreply.github.com>
2025-07-18 13:54:38 +08:00
Marcus Schiesser 856dd8cca8 fix: assume new models are function call models (#2112) 2025-07-18 12:52:43 +08:00
Jerry Cheng d8f4f6a859 Update SupabaseVectorStore.ts to fix score calculating error (#2109)
Co-authored-by: Marcus Schiesser <marcus.schiesser@googlemail.com>
2025-07-18 12:48:47 +08:00
Logan f594d7034f revamp getting started flow and main index page (#2079)
Co-authored-by: Thuc Pham <51660321+thucpn@users.noreply.github.com>
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
Co-authored-by: thucpn <thucsh2@gmail.com>
2025-07-17 16:27:28 +08:00
168 changed files with 3927 additions and 548 deletions
+42
View File
@@ -1,5 +1,47 @@
# @llamaindex/doc
## 0.2.44
### Patch Changes
- 38da40b: feat: VectoryMemoryBlock
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/cloud@4.0.26
- llamaindex@0.11.21
- @llamaindex/node-parser@2.0.17
- @llamaindex/openai@0.4.12
- @llamaindex/readers@3.1.16
- @llamaindex/workflow@1.1.17
## 0.2.43
### Patch Changes
- ea15e75: Minor updates in deployment docs
## 0.2.42
### Patch Changes
- a8ec08c: fix: ensure correct message content in agent workflow
- Updated dependencies [a8ec08c]
- Updated dependencies [2967d57]
- @llamaindex/core@0.6.16
- @llamaindex/workflow@1.1.16
- @llamaindex/cloud@4.0.25
- llamaindex@0.11.20
- @llamaindex/node-parser@2.0.16
- @llamaindex/openai@0.4.11
- @llamaindex/readers@3.1.15
## 0.2.41
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.2.40
### Patch Changes
+27
View File
@@ -27,6 +27,33 @@ const config = {
destination: "/docs/workflows/:path*",
permanent: true,
},
{
source: "/docs/llamaindex/getting_started/installation/node.mdx",
destination:
"/docs/llamaindex/getting_started/installation/server-apis.mdx",
permanent: true,
},
{
source: "/docs/llamaindex/getting_started/installation/typescript.mdx",
destination: "/docs/llamaindex/getting_started/installation/index.mdx",
permanent: true,
},
{
source: "/docs/llamaindex/getting_started/installation/next.mdx",
destination: "/docs/llamaindex/getting_started/installation/nextjs.mdx",
permanent: true,
},
{
source: "/docs/llamaindex/getting_started/installation/vite.mdx",
destination: "/docs/llamaindex/getting_started/installation/index.mdx",
permanent: true,
},
{
source: "/docs/llamaindex/getting_started/installation/cloudflare.mdx",
destination:
"/docs/llamaindex/getting_started/installation/serverless.mdx",
permanent: true,
},
];
},
turbopack: {
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/doc",
"version": "0.2.40",
"version": "0.2.44",
"private": true,
"scripts": {
"postinstall": "fumadocs-mdx",
Binary file not shown.

Before

Width:  |  Height:  |  Size: 540 KiB

After

Width:  |  Height:  |  Size: 206 KiB

+2 -1
View File
@@ -1,6 +1,6 @@
import { AIProvider } from "@/actions";
import { TooltipProvider } from "@/components/ui/tooltip";
import { GoogleAnalytics } from "@next/third-parties/google";
import { GoogleAnalytics, GoogleTagManager } from "@next/third-parties/google";
import { RootProvider } from "fumadocs-ui/provider";
import { Inter } from "next/font/google";
import type { ReactNode } from "react";
@@ -36,6 +36,7 @@ export default function Layout({ children }: { children: ReactNode }) {
LlamaIndex.TS - Build LLM-powered document agents and workflows
</title>
</head>
<GoogleTagManager gtmId="GTM-WWRFB36R" />
<body className="flex min-h-screen flex-col">
<TooltipProvider>
<AIProvider>
@@ -19,3 +19,8 @@ npm run dev
to start the development server. You can then visit [http://localhost:3000](http://localhost:3000) to see your app, which should look something like this:
![create-llama interface](/images/create_llama.png)
## Learn more
- [Learn more about `create-llama`](https://github.com/run-llama/create-llama)
- [Want to use the same UI components? You can use our React components](https://ui.llamaindex.ai/)
@@ -17,7 +17,8 @@ npm i
Then you can run any example in the folder with `tsx`, e.g.:
```bash npm2yarn
npx tsx ./vectorIndex.ts
export OPENAI_API_KEY=your-api-key
npx tsx ./agents/agent/openai.ts
```
## Try examples online
@@ -1,70 +0,0 @@
---
title: With Cloudflare Worker
description: In this guide, you'll learn how to use LlamaIndex with CloudFlare Worker
---
Before you start, make sure you have try LlamaIndex.TS in Node.js to make sure you understand the basics.
<Card
title="Getting Started with LlamaIndex.TS in Node.js"
href="/docs/llamaindex/getting_started/installation/node"
/>
Also, you need have the basic understanding of <a href='https://developers.cloudflare.com/workers/'><SiCloudflareworkers className="inline mr-2" color="#F38020" />Cloudflare Worker</a>.
## Adding environment variables
```ts
export default {
async fetch(request: Request, env: Env): Promise<Response> {
const { setEnvs } = await import("@llamaindex/env");
setEnvs(env);
const { OpenAIAgent } = await import("@llamaindex/openai");
// Start your code here
return new Response("Hello, world!");
},
};
```
Then, you need create `.dev.vars` and add LLM api keys for the local development, such as `OPENAI_API_KEY` for OpenAI API key.
<Callout type="warn">Do not commit the api key to git repository.</Callout>
## Integrating with Hono
```ts
import { Hono } from "hono";
type Bindings = {
OPENAI_API_KEY: string;
};
const app = new Hono<{
Bindings: Bindings;
}>();
app.post("/llm", async (c) => {
const { setEnvs } = await import("@llamaindex/env");
setEnvs(c.env);
// ...
return new Response('Hello, world!');
})
export default {
fetch: app.fetch,
};
```
## Difference between Node.js and Cloudflare Worker
In Cloudflare Worker and similar serverless JS environment, you need to be aware of the following differences:
- Some Node.js modules are not available in Cloudflare Worker, such as `node:fs`, `node:child_process`, `node:cluster`...
- You are recommend to design your code using network request, such as use `fetch` API to communicate with database, instead of a long-running process in Node.js.
- Some of LlamaIndex.TS packages are not available in Cloudflare Worker, for example `@llamaindex/readers` and `@llamaindex/huggingface`.
- The main `llamaindex` is designed to work in all JavaScript environment, including Cloudflare Worker. If you find any issue, please report to us.
- `@llamaindex/env` is a JS environment binding module, which polyfill some Node.js/Modern Web API (for example, we have a memory based `fs` module, and Crypto API polyfill). It is designed to work in all JavaScript environment, including Cloudflare Worker.
@@ -1,69 +1,177 @@
---
title: Installation
description: How to install llamaindex packages.
description: How to install and set up LlamaIndex.TS for your project.
---
To install llamaindex, run the following command:
## Quick Start
Install the core package:
```package-install
npm i llamaindex
```
In most cases, you'll also need an LLM package and the Workflow package to use LlamaIndex. For example, to use the OpenAI LLM with agents, you would install the following:
In most cases, you'll also need an LLM provider and the Workflow package:
```package-install
npm i @llamaindex/openai @llamaindex/workflow
```
Go to [LLM APIs](/docs/llamaindex/modules/models/llms) to find out how to use other LLMs.
## Environment Setup
### API Keys
## Frameworks
Most LLM providers require API keys. Set your OpenAI key (or other provider):
LlamaIndex supports a wide range of frameworks and runtimes. Click on the card below to learn more.
```bash
export OPENAI_API_KEY=your-api-key
```
Or use a `.env` file:
```bash
echo "OPENAI_API_KEY=your-api-key" > .env
```
<Callout type="warn">Never commit API keys to your repository.</Callout>
### Loading Environment Variables
For Node.js applications:
```bash
node --env-file .env your-script.js
```
For other environments, see the deployment-specific guides below.
## TypeScript Configuration
LlamaIndex.TS is built with TypeScript and provides excellent type safety. Add these settings to your `tsconfig.json`:
```json5
{
"compilerOptions": {
// Essential for module resolution
"moduleResolution": "bundler", // or "nodenext" | "node16" | "node"
// Required for Web Stream API support
"lib": ["DOM.AsyncIterable"],
// Recommended for better compatibility
"target": "es2020",
"module": "esnext"
}
}
```
## Running your first agent
### Set up
If you don't already have a project, you can create a new one in a new folder:
```package-install
npm init
npm i -D typescript @types/node
npm i @llamaindex/openai @llamaindex/workflow llamaindex zod
```
### Run the agent
Create the file `example.ts`. This code will:
- Create two tools for use by the agent:
- A `sumNumbers` tool that adds two numbers
- A `divideNumbers` tool that divides numbers
- Give an example of the data structure we wish to generate
- Prompt the LLM with instructions and the example, plus a sample transcript
<include cwd>../../examples/agents/agent/openai.ts</include>
To run the code:
```package-install
npx tsx example.ts
```
You should expect output something like:
```
{
result: '5 + 5 is 10. Then, 10 divided by 2 is 5.',
state: {
memory: Memory {
messages: [Array],
tokenLimit: 30000,
shortTermTokenLimitRatio: 0.7,
memoryBlocks: [],
memoryCursor: 0,
adapters: [Object]
},
scratchpad: [],
currentAgentName: 'Agent',
agents: [ 'Agent' ],
nextAgentName: null
}
}
Done
```
## Performance Optimization
### Tokenization Speed
Install `gpt-tokenizer` for 60x faster tokenization (Node.js environments only):
```package-install
npm i gpt-tokenizer
```
LlamaIndex will automatically use this when available.
## Deployment Guides
Choose your deployment target:
<Cards>
<Card title={
<>
<SiNodedotjs className="inline" color="#5FA04E" /> Node.js
</>
} href="/docs/llamaindex/getting_started/installation/node" />
<Card title={
<>
<SiTypescript className="inline" color="#3178C6" /> TypeScript
</>
} href="/docs/llamaindex/getting_started/installation/typescript" />
<Card title={
<>
<SiVite className='inline' color='#646CFF' /> Vite
</>
} href="/docs/llamaindex/getting_started/installation/vite" />
<Card
title={
<>
<SiNextdotjs className='inline' /> Next.js (React Server Component)
</>
}
href="/docs/llamaindex/getting_started/installation/next"
/>
<Card title={
<>
<SiCloudflareworkers className='inline' color='#F38020' /> Cloudflare Workers
</>
} href="/docs/llamaindex/getting_started/installation/cloudflare" />
<Card
title="Server APIs & Backends"
description="Express, Fastify, Koa, standalone Node.js servers"
href="/docs/llamaindex/getting_started/installation/server-apis"
/>
<Card
title="Serverless Functions"
description="Vercel, Netlify, AWS Lambda, Cloudflare Workers"
href="/docs/llamaindex/getting_started/installation/serverless"
/>
<Card
title="Next.js Applications"
description="API routes, server components, edge runtime"
href="/docs/llamaindex/getting_started/installation/nextjs"
/>
<Card
title="Troubleshooting"
description="Common issues, bundle optimization, compatibility"
href="/docs/llamaindex/getting_started/installation/troubleshooting"
/>
</Cards>
## What's next?
## LLM/Embedding Providers
Go to [LLM APIs](/docs/llamaindex/modules/models/llms) and [Embedding APIs](/docs/llamaindex/modules/models/embeddings) to find out how to use different LLM and embedding providers beyond OpenAI.
## What's Next?
<Cards>
<Card
title="Learn LlamaIndex.TS"
description="Learn how to use LlamaIndex.TS by starting with one of our tutorials."
href="/docs/llamaindex/tutorials/rag"
/>
<Card
title="Show me code examples"
description="Explore code examples using LlamaIndex.TS."
href="/docs/llamaindex/getting_started/examples"
/>
<Card
title="Learn LlamaIndex.TS"
description="Learn how to use LlamaIndex.TS by starting with one of our tutorials."
href="/docs/llamaindex/tutorials/basic_agent"
/>
<Card
title="Show me code examples"
description="Explore code examples using LlamaIndex.TS."
href="/docs/llamaindex/getting_started/examples"
/>
</Cards>
@@ -1,4 +1,4 @@
{
"title": "Installation",
"pages": ["node", "typescript", "next", "vite", "cloudflare"]
"pages": ["server-apis", "serverless", "nextjs", "troubleshooting"]
}
@@ -1,41 +0,0 @@
---
title: With Next.js
description: In this guide, you'll learn how to use LlamaIndex with Next.js.
---
Before you start, make sure you have try LlamaIndex.TS in Node.js to make sure you understand the basics.
<Card
title="Getting Started with LlamaIndex.TS in Node.js"
href="/docs/llamaindex/getting_started/installation/node"
/>
## Differences between Node.js and Next.js
Next.js is a React framework that has both server side compatibility and client side compatibility.
This means that you need to be careful when using LlamaIndex.TS in Next.js.
Don't leak the import data like API keys to the client side.
Also, in Next.js, there is build time and runtime. Some computations can be done at build time like Document embedding could be done at build time for better performance.
Where as the `llamaindex` package is working with Next.js, some provider packages like `@llamaindex/huggingface` are not working well with Next.js. This is due to the upstream dependencies used by the provider package.
Make sure to use `withLlamaIndex` to make sure that LlamaIndex.TS works well with Next.js.
```js
// next.config.mjs / next.config.ts
import withLlamaIndex from "llamaindex/next";
/** @type {import('next').NextConfig} */
const nextConfig = {};
export default withLlamaIndex(nextConfig);
```
If you see any dependency issues, you are welcome to open an issue on the GitHub.
## Edge Runtime
[Vercel Edge Runtime](https://edge-runtime.vercel.app/) is a subset of Node.js APIs. Similar to [Cloudflare Workers](/docs/llamaindex/getting_started/installation/cloudflare#difference-between-nodejs-and-cloudflare-worker),
it is a serverless platform that runs your code on the edge.
Not all features of Node.js are supported in Vercel Edge Runtime, so does LlamaIndex.TS, we are working on more compatibility with all JavaScript runtimes.
@@ -0,0 +1,405 @@
---
title: Next.js Applications
description: Deploy LlamaIndex.TS in Next.js applications with API routes, server components, and edge runtime.
---
This guide covers integrating LlamaIndex.TS agents with Next.js applications.
## Essential Configuration
### Next.js Config
Use `withLlamaIndex` to ensure compatibility:
```javascript
// next.config.mjs
import withLlamaIndex from "llamaindex/next";
/** @type {import('next').NextConfig} */
const nextConfig = {
// Your existing config
};
export default withLlamaIndex(nextConfig);
```
## API Routes
### App Router (Recommended)
```typescript
// app/api/chat/route.ts
import { agent } from "@llamaindex/workflow";
import { tool } from "llamaindex";
import { openai } from "@llamaindex/openai";
import { z } from "zod";
import { NextRequest, NextResponse } from "next/server";
// Initialize agent once (consider using a singleton pattern)
let myAgent: any = null;
async function initializeAgent() {
if (myAgent) return myAgent;
try {
const greetTool = tool({
name: "greet",
description: "Greets a user with their name",
parameters: z.object({
name: z.string(),
}),
execute: ({ name }) => `Hello, ${name}! How can I help you today?`,
});
myAgent = agent({
tools: [greetTool],
llm: openai({ model: "gpt-4o-mini" }),
});
return myAgent;
} catch (error) {
console.error("Failed to initialize agent:", error);
throw error;
}
}
export async function POST(request: NextRequest) {
try {
const { message } = await request.json();
if (!message || typeof message !== 'string') {
return NextResponse.json(
{ error: "Message is required and must be a string" },
{ status: 400 }
);
}
const agent = await initializeAgent();
const result = await agent.run(message);
return NextResponse.json({ response: result.data });
} catch (error) {
console.error("Chat error:", error);
return NextResponse.json(
{ error: "Internal server error" },
{ status: 500 }
);
}
}
```
### Pages Router (Legacy)
```typescript
// pages/api/chat.ts
import { agent } from "@llamaindex/workflow";
import { tool } from "llamaindex";
import { openai } from "@llamaindex/openai";
import { z } from "zod";
import type { NextApiRequest, NextApiResponse } from "next";
let myAgent: any = null;
async function initializeAgent() {
if (myAgent) return myAgent;
const timeTool = tool({
name: "getCurrentTime",
description: "Gets the current time",
parameters: z.object({}),
execute: () => new Date().toISOString(),
});
myAgent = agent({
tools: [timeTool],
llm: openai({ model: "gpt-4o-mini" }),
});
return myAgent;
}
export default async function handler(
req: NextApiRequest,
res: NextApiResponse
) {
if (req.method !== "POST") {
return res.status(405).json({ error: "Method not allowed" });
}
try {
const { message } = req.body;
const agent = await initializeAgent();
const result = await agent.run(message);
res.json({ response: result.data });
} catch (error) {
console.error("Chat error:", error);
res.status(500).json({ error: "Internal server error" });
}
}
```
## Server Components
Initialize agents in server components:
```typescript
// app/chat/page.tsx
import { agent } from "@llamaindex/workflow";
import { tool } from "llamaindex";
import { openai } from "@llamaindex/openai";
import { z } from "zod";
async function initializeAgent() {
const helpTool = tool({
name: "getHelp",
description: "Provides help information",
parameters: z.object({
topic: z.string().optional(),
}),
execute: ({ topic }) => {
if (topic) {
return `Here's help for ${topic}: This is a helpful resource about ${topic}.`;
}
return "Available topics: general, troubleshooting, api, deployment";
},
});
return agent({
tools: [helpTool],
llm: openai({ model: "gpt-4o-mini" }),
});
}
export default async function ChatPage() {
const chatAgent = await initializeAgent();
return (
<div>
<h1>Chat Interface</h1>
<p>Agent initialized and ready to help!</p>
{/* Your chat UI components */}
</div>
);
}
```
## Edge Runtime
The Edge Runtime has limited Node.js API access:
```typescript
// app/api/chat-edge/route.ts
import { NextRequest, NextResponse } from "next/server";
export const runtime = "edge";
export async function POST(request: NextRequest) {
const { setEnvs } = await import("@llamaindex/env");
setEnvs(process.env);
try {
const { message } = await request.json();
const { agent } = await import("@llamaindex/workflow");
const { tool } = await import("llamaindex");
const { openai } = await import("@llamaindex/openai");
const { z } = await import("zod");
const timeTool = tool({
name: "time",
description: "Gets current time",
parameters: z.object({}),
execute: () => new Date().toISOString(),
});
const myAgent = agent({
tools: [timeTool],
llm: openai({ model: "gpt-4o-mini" }),
});
const result = await myAgent.run(message);
return NextResponse.json({ response: result.data });
} catch (error) {
return NextResponse.json({ error: error.message }, { status: 500 });
}
}
```
## Streaming Responses
Implement streaming for better user experience:
```typescript
// app/api/chat-stream/route.ts
import { agent } from "@llamaindex/workflow";
import { tool } from "llamaindex";
import { openai } from "@llamaindex/openai";
import { agentStreamEvent } from "@llamaindex/workflow";
import { NextRequest } from "next/server";
import { z } from "zod";
// Initialize agent once (consider using a singleton pattern)
let myAgent: any = null;
async function initializeAgent() {
if (myAgent) return myAgent;
try {
const greetTool = tool({
name: "greet",
description: "Greets a user with their name",
parameters: z.object({
name: z.string(),
}),
execute: ({ name }) => `Hello, ${name}! How can I help you today?`,
});
myAgent = agent({
tools: [greetTool],
llm: openai({ model: "gpt-4o-mini" }),
});
return myAgent;
} catch (error) {
console.error("Failed to initialize agent:", error);
throw error;
}
}
export async function POST(request: NextRequest) {
const { message } = await request.json();
const stream = new ReadableStream({
async start(controller) {
try {
const agent = await initializeAgent();
const events = agent.runStream(message);
for await (const event of events) {
if (agentStreamEvent.include(event)) {
controller.enqueue(new TextEncoder().encode(event.data.delta));
}
}
controller.close();
} catch (error) {
controller.error(error);
}
},
});
return new Response(stream, {
headers: {
"Content-Type": "text/plain",
"Transfer-Encoding": "chunked",
},
});
}
```
## Client-side Integration
### React Hook for API Calls
```typescript
// hooks/useAgentChat.ts
import { useState } from "react";
export function useAgentChat() {
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [response, setResponse] = useState<string | null>(null);
const chat = async (message: string) => {
setLoading(true);
setError(null);
try {
const res = await fetch("/api/chat", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ message }),
});
if (!res.ok) {
throw new Error(`HTTP error! status: ${res.status}`);
}
const data = await res.json();
setResponse(data.response);
} catch (err) {
setError(err instanceof Error ? err.message : "An error occurred");
} finally {
setLoading(false);
}
};
return { chat, loading, error, response };
}
```
### Chat Component
```typescript
// components/ChatInterface.tsx
"use client";
import { useState } from "react";
import { useAgentChat } from "@/hooks/useAgentChat";
export default function ChatInterface() {
const [message, setMessage] = useState("");
const { chat, loading, error, response } = useAgentChat();
const handleSubmit = async (e: React.FormEvent) => {
e.preventDefault();
if (!message.trim()) return;
await chat(message);
setMessage("");
};
return (
<div className="max-w-2xl mx-auto p-4">
<form onSubmit={handleSubmit} className="mb-4">
<input
type="text"
value={message}
onChange={(e) => setMessage(e.target.value)}
placeholder="Send a message..."
className="w-full p-2 border rounded"
disabled={loading}
/>
<button
type="submit"
disabled={loading || !message.trim()}
className="mt-2 px-4 py-2 bg-blue-500 text-white rounded disabled:opacity-50"
>
{loading ? "Thinking..." : "Send"}
</button>
</form>
{error && (
<div className="p-3 mb-4 bg-red-100 border border-red-400 text-red-700 rounded">
Error: {error}
</div>
)}
{response && (
<div className="p-3 bg-gray-100 border rounded">
<strong>Agent:</strong>
<p>{response}</p>
</div>
)}
</div>
);
}
```
## Next Steps
- Learn about [serverless deployment](/docs/llamaindex/getting_started/installation/serverless)
- Explore [server APIs](/docs/llamaindex/getting_started/installation/server-apis)
- Check [troubleshooting guide](/docs/llamaindex/getting_started/installation/troubleshooting) for common issues
@@ -1,40 +0,0 @@
---
title: With Node.js/Bun/Deno
description: In this guide, you'll learn how to use LlamaIndex with Node.js, Bun, and Deno.
---
## Adding environment variables
By default, LlamaIndex uses OpenAI provider, which requires an API key. You can set the `OPENAI_API_KEY` environment variable to authenticate with OpenAI.
```shell
export OPENAI_API_KEY=your-api-key
```
Or you can use a `.env` file:
```shell
echo "OPENAI_API_KEY=your-api-key" > .env
node --env-file .env your-script.js
```
<Callout type="warn">Do not commit the api key to git repository.</Callout>
For more information, see the [How to read environment variables from Node.js](https://nodejs.org/en/learn/command-line/how-to-read-environment-variables-from-nodejs).
## Performance Optimization
By the default, we are using `js-tiktoken` for tokenization. You can install `gpt-tokenizer` which is then automatically used by LlamaIndex to get a 60x speedup for tokenization:
```package-install
npm i gpt-tokenizer
```
**Note**: This only works for Node.js
## TypeScript support
<Card
title="Getting Started with LlamaIndex.TS in TypeScript"
href="/docs/llamaindex/getting_started/installation/typescript"
/>
@@ -0,0 +1,211 @@
---
title: Server APIs & Backends
description: Deploy LlamaIndex.TS in server environments like Express, Fastify, and standalone Node.js applications.
---
This guide covers adding LlamaIndex.TS agents to traditional server environments where you have full Node.js runtime access.
## Supported Runtimes
LlamaIndex.TS works seamlessly with:
- **Node.js** (v18+)
- **Bun** (v1.0+)
- **Deno** (v1.30+)
## Common Server Frameworks
### Express.js
```typescript
import express from 'express';
import { agent } from '@llamaindex/workflow';
import { tool } from 'llamaindex';
import { openai } from '@llamaindex/openai';
import { z } from 'zod';
const app = express();
app.use(express.json());
// Initialize agent once at startup
let myAgent: any;
async function initializeAgent() {
// Create tools for the agent
const sumTool = tool({
name: "sum",
description: "Adds two numbers",
parameters: z.object({
a: z.number(),
b: z.number(),
}),
execute: ({ a, b }) => a + b,
});
const multiplyTool = tool({
name: "multiply",
description: "Multiplies two numbers",
parameters: z.object({
a: z.number(),
b: z.number(),
}),
execute: ({ a, b }) => a * b,
});
// Create the agent
myAgent = agent({
tools: [sumTool, multiplyTool],
llm: openai({ model: "gpt-4o-mini" }),
});
}
app.post('/api/chat', async (req, res) => {
try {
const { message } = req.body;
const result = await myAgent.run(message);
res.json({ response: result.data });
} catch (error) {
res.status(500).json({ error: 'Chat failed' });
}
});
// Initialize and start server
initializeAgent().then(() => {
app.listen(3000, () => {
console.log('Server running on port 3000');
});
});
```
### Fastify
```typescript
import Fastify from 'fastify';
import { agent } from '@llamaindex/workflow';
import { tool } from 'llamaindex';
import { openai } from '@llamaindex/openai';
import { z } from 'zod';
const fastify = Fastify();
let myAgent: any;
async function initializeAgent() {
const sumTool = tool({
name: "sum",
description: "Adds two numbers",
parameters: z.object({
a: z.number(),
b: z.number(),
}),
execute: ({ a, b }) => a + b,
});
myAgent = agent({
tools: [sumTool],
llm: openai({ model: "gpt-4o-mini" }),
});
}
fastify.post('/api/chat', async (request, reply) => {
try {
const { message } = request.body as { message: string };
const result = await myAgent.run(message);
return { response: result.data };
} catch (error) {
reply.status(500).send({ error: 'Chat failed' });
}
});
const start = async () => {
await initializeAgent();
await fastify.listen({ port: 3000 });
console.log('Server running on port 3000');
};
start();
```
### Hono
```typescript
import { Hono } from "hono";
import { agent } from "@llamaindex/workflow";
import { tool } from "llamaindex";
import { openai } from "@llamaindex/openai";
import { z } from "zod";
type Bindings = {
OPENAI_API_KEY: string;
};
const app = new Hono<{ Bindings: Bindings }>();
app.post("/api/chat", async (c) => {
const { setEnvs } = await import("@llamaindex/env");
setEnvs(c.env);
const { message } = await c.req.json();
const greetTool = tool({
name: "greet",
description: "Greets a user",
parameters: z.object({
name: z.string(),
}),
execute: ({ name }) => `Hello, ${name}!`,
});
const myAgent = agent({
tools: [greetTool],
llm: openai({ model: "gpt-4o-mini" }),
});
try {
const result = await myAgent.run(message);
return c.json({ response: result.data });
} catch (error) {
return c.json({ error: error.message }, 500);
}
});
export default app;
```
## Streaming Responses
For real-time agent responses:
```typescript
import { agentStreamEvent } from "@llamaindex/workflow";
app.post('/api/chat-stream', async (req, res) => {
const { message } = req.body;
res.writeHead(200, {
'Content-Type': 'text/plain',
'Transfer-Encoding': 'chunked',
});
try {
const events = myAgent.runStream(message);
for await (const event of events) {
if (agentStreamEvent.include(event)) {
res.write(event.data.delta);
}
}
res.end();
} catch (error) {
res.write('Error: ' + error.message);
res.end();
}
});
```
## Next Steps
- Learn about [serverless deployment](/docs/llamaindex/getting_started/installation/serverless)
- Explore [Next.js integration](/docs/llamaindex/getting_started/installation/nextjs)
- Check [troubleshooting guide](/docs/llamaindex/getting_started/installation/troubleshooting) for common issues
@@ -0,0 +1,240 @@
---
title: Serverless Functions
description: Deploy LlamaIndex.TS in serverless environments like Vercel, Netlify, AWS Lambda, and Cloudflare Workers.
---
This guide covers adding LlamaIndex.TS agents to serverless environments where you have execution time and memory constraints.
## Cloudflare Workers
```typescript
export default {
async fetch(request: Request, env: Env): Promise<Response> {
const { setEnvs } = await import("@llamaindex/env");
setEnvs(env);
const { agent } = await import("@llamaindex/workflow");
const { openai } = await import("@llamaindex/openai");
const { tool } = await import("llamaindex");
const { z } = await import("zod");
const timeTool = tool({
name: "getCurrentTime",
description: "Gets the current time",
parameters: z.object({}),
execute: () => new Date().toISOString(),
});
const myAgent = agent({
tools: [timeTool],
llm: openai({ model: "gpt-4o-mini" }),
});
try {
const { message } = await request.json();
const result = await myAgent.run(message);
return new Response(JSON.stringify({ response: result.data }), {
headers: { "Content-Type": "application/json" },
});
} catch (error) {
return new Response(JSON.stringify({ error: error.message }), {
status: 500,
headers: { "Content-Type": "application/json" },
});
}
},
};
```
## Vercel Functions
### Node.js Runtime
```typescript
// pages/api/chat.ts or app/api/chat/route.ts
import { agent } from "@llamaindex/workflow";
import { tool } from "llamaindex";
import { openai } from "@llamaindex/openai";
import { z } from "zod";
export default async function handler(req, res) {
if (req.method !== 'POST') {
return res.status(405).json({ error: 'Method not allowed' });
}
const { message } = req.body;
const weatherTool = tool({
name: "getWeather",
description: "Get weather information",
parameters: z.object({
city: z.string(),
}),
execute: ({ city }) => `Weather in ${city}: 72°F, sunny`,
});
const myAgent = agent({
tools: [weatherTool],
llm: openai({ model: "gpt-4o-mini" }),
});
try {
const result = await myAgent.run(message);
res.json({ response: result.data });
} catch (error) {
res.status(500).json({ error: error.message });
}
}
```
### Edge Runtime
```typescript
// app/api/chat/route.ts
import { NextRequest, NextResponse } from "next/server";
export const runtime = "edge";
export async function POST(request: NextRequest) {
const { setEnvs } = await import("@llamaindex/env");
setEnvs(process.env);
const { message } = await request.json();
try {
// Use simpler tools for edge runtime
const { agent } = await import("@llamaindex/workflow");
const { tool } = await import("llamaindex");
const { openai } = await import("@llamaindex/openai");
const { z } = await import("zod");
const timeTool = tool({
name: "time",
description: "Gets current time",
parameters: z.object({}),
execute: () => new Date().toISOString(),
});
const myAgent = agent({
tools: [timeTool],
llm: openai({ model: "gpt-4o-mini" }),
});
const result = await myAgent.run(message);
return NextResponse.json({ response: result.data });
} catch (error) {
return NextResponse.json({ error: error.message }, { status: 500 });
}
}
```
## AWS Lambda
```typescript
import { APIGatewayProxyHandler } from "aws-lambda";
import { agent } from "@llamaindex/workflow";
import { tool } from "llamaindex";
import { openai } from "@llamaindex/openai";
import { z } from "zod";
export const handler: APIGatewayProxyHandler = async (event, context) => {
const { message } = JSON.parse(event.body || "{}");
const calculatorTool = tool({
name: "calculate",
description: "Performs basic math",
parameters: z.object({
expression: z.string(),
}),
execute: ({ expression }) => {
// Simple calculator implementation
try {
return `Result: ${eval(expression)}`;
} catch {
return "Invalid expression";
}
},
});
const myAgent = agent({
tools: [calculatorTool],
llm: openai({ model: "gpt-4o-mini" }),
});
try {
const result = await myAgent.run(message);
return {
statusCode: 200,
headers: {
"Content-Type": "application/json",
"Access-Control-Allow-Origin": "*",
},
body: JSON.stringify({ response: result.data }),
};
} catch (error) {
return {
statusCode: 500,
body: JSON.stringify({ error: error.message }),
};
}
};
```
## Netlify Functions
```typescript
// netlify/functions/chat.ts
import { Handler } from "@netlify/functions";
import { agent } from "@llamaindex/workflow";
import { tool } from "llamaindex";
import { openai } from "@llamaindex/openai";
import { z } from "zod";
export const handler: Handler = async (event, context) => {
if (event.httpMethod !== "POST") {
return { statusCode: 405, body: "Method Not Allowed" };
}
const { message } = JSON.parse(event.body || "{}");
const helpTool = tool({
name: "help",
description: "Provides help information",
parameters: z.object({
topic: z.string().optional(),
}),
execute: ({ topic }) => {
return topic ? `Help for ${topic}` : "Available help topics";
},
});
const myAgent = agent({
tools: [helpTool],
llm: openai({ model: "gpt-4o-mini" }),
});
try {
const result = await myAgent.run(message);
return {
statusCode: 200,
body: JSON.stringify({ response: result.data }),
};
} catch (error) {
return {
statusCode: 500,
body: JSON.stringify({ error: error.message }),
};
}
};
```
## Next Steps
- Learn about [Next.js integration](/docs/llamaindex/getting_started/installation/nextjs)
- Explore [server deployment](/docs/llamaindex/getting_started/installation/server-apis)
- Check [troubleshooting guide](/docs/llamaindex/getting_started/installation/troubleshooting) for common issues
@@ -0,0 +1,501 @@
---
title: Troubleshooting
description: Common issues and solutions when installing and deploying LlamaIndex.TS applications.
---
This guide addresses common issues you might encounter when installing and deploying LlamaIndex.TS applications across different environments.
## Installation Issues
### Module Resolution Errors
**Problem:** Import errors or module not found errors
**Solution:** Ensure your `tsconfig.json` is properly configured:
```json5
{
"compilerOptions": {
"moduleResolution": "bundler", // or "nodenext" | "node16" | "node"
"lib": ["DOM.AsyncIterable"],
"target": "es2020",
"module": "esnext"
}
}
```
**Alternative solution:** Try different module resolution strategies:
```bash
# Clear node_modules and reinstall
rm -rf node_modules package-lock.json
npm install
# Or try with different package manager
pnpm install
# or
yarn install
```
### TypeScript Errors
**Problem:** TypeScript compilation errors with LlamaIndex imports
**Solution:** Ensure you have the correct TypeScript configuration:
```json5
{
"compilerOptions": {
"strict": true,
"skipLibCheck": true, // Skip type checking of node_modules
"allowSyntheticDefaultImports": true,
"esModuleInterop": true
}
}
```
### Package Compatibility Issues
**Problem:** Some packages don't work in certain environments
**Common incompatibilities:**
- `@llamaindex/readers` - May not work in serverless environments
- `@llamaindex/huggingface` - Limited browser/edge compatibility
- File system readers - Don't work in browser/edge environments
**Solution:** Use environment-specific alternatives:
```typescript
// Instead of file system readers in serverless
// Use remote data sources
async function loadDocumentsFromAPI() {
const response = await fetch('https://api.example.com/documents');
const data = await response.json();
return data.map(doc => new Document(doc.content));
}
```
## Runtime Issues
### Memory Errors
**Problem:** Out of memory errors during index creation or querying
**Solution:** Optimize memory usage:
```typescript
// Batch process large document sets
async function batchProcessDocuments(documents: Document[], batchSize = 10) {
const results = [];
for (let i = 0; i < documents.length; i += batchSize) {
const batch = documents.slice(i, i + batchSize);
const batchIndex = await VectorStoreIndex.fromDocuments(batch);
results.push(batchIndex);
// Optional: Add delay between batches
await new Promise(resolve => setTimeout(resolve, 100));
}
return results;
}
```
**For serverless environments:**
```typescript
// Use external vector stores instead of in-memory
// TODO: Example with Pinecone, Weaviate, etc.
// const vectorStore = new PineconeVectorStore(/* config */);
// const index = await VectorStoreIndex.fromVectorStore(vectorStore);
```
### API Rate Limiting
**Problem:** Rate limiting errors from LLM providers
**Solution:** Implement retry logic with exponential backoff:
```typescript
async function queryWithRetry(queryEngine: any, question: string, maxRetries = 3) {
for (let i = 0; i < maxRetries; i++) {
try {
return await queryEngine.query(question);
} catch (error) {
if (error.message.includes('rate limit') && i < maxRetries - 1) {
const delay = Math.pow(2, i) * 1000; // Exponential backoff
await new Promise(resolve => setTimeout(resolve, delay));
continue;
}
throw error;
}
}
}
```
### Tokenization Performance
**Problem:** Slow tokenization affecting performance
**Solution:** Install faster tokenizer (Node.js only):
```bash
npm install gpt-tokenizer
```
LlamaIndex will automatically use this for 60x faster tokenization.
## Bundling Issues
### Bundle Size Too Large
**Problem:** Large bundle sizes affecting performance
**Solution:** Use dynamic imports and code splitting:
```typescript
// Lazy load LlamaIndex components
const initializeLlamaIndex = async () => {
const { VectorStoreIndex, SimpleDirectoryReader } = await import("llamaindex");
return { VectorStoreIndex, SimpleDirectoryReader };
};
// In your API route
export async function POST(request: NextRequest) {
const { VectorStoreIndex, SimpleDirectoryReader } = await initializeLlamaIndex();
// Use the imported modules
}
```
### Webpack/Vite Bundling Issues
**Problem:** Bundler compatibility issues
**Solution for Next.js:**
```javascript
// next.config.mjs
import withLlamaIndex from "llamaindex/next";
const nextConfig = {
webpack: (config, { isServer }) => {
// Custom webpack configuration if needed
if (!isServer) {
config.resolve.fallback = {
...config.resolve.fallback,
fs: false,
net: false,
tls: false,
};
}
return config;
},
};
export default withLlamaIndex(nextConfig);
```
**Solution for Vite:**
```typescript
// vite.config.ts
import { defineConfig } from 'vite';
export default defineConfig({
define: {
global: 'globalThis',
},
resolve: {
alias: {
// Add aliases for problematic modules
},
},
optimizeDeps: {
include: ['llamaindex'],
},
});
```
## Environment-Specific Issues
### Node.js Version Compatibility
**Problem:** Node.js version compatibility issues
**Solution:** Use supported Node.js versions:
```json
{
"engines": {
"node": ">=18.0.0"
}
}
```
**Check your Node.js version:**
```bash
node --version
```
### Cloudflare Workers Issues
**Problem:** Module not available in Cloudflare Workers
**Solution:** Use `@llamaindex/env` for environment compatibility:
```typescript
export default {
async fetch(request: Request, env: Env): Promise<Response> {
const { setEnvs } = await import("@llamaindex/env");
setEnvs(env);
// Your LlamaIndex code here
},
};
```
### Vercel Edge Runtime Issues
**Problem:** Limited Node.js API access in Edge Runtime
**Solution:** Use standard runtime or adapt code:
```typescript
// Force standard runtime
export const runtime = "nodejs";
// Or adapt for edge
export const runtime = "edge";
export async function POST(request: NextRequest) {
// Use edge-compatible code only
const { setEnvs } = await import("@llamaindex/env");
setEnvs(process.env);
// Avoid file system operations
// Use remote data sources
}
```
## Performance Issues
### Slow Query Responses
**Problem:** Slow query performance
**Solution:** Implement caching and optimization:
```typescript
import { LRUCache } from 'lru-cache';
const queryCache = new LRUCache<string, string>({
max: 100,
ttl: 1000 * 60 * 10, // 10 minutes
});
export async function optimizedQuery(question: string, queryEngine: any) {
// Check cache first
const cached = queryCache.get(question);
if (cached) return cached;
// Query and cache result
const result = await queryEngine.query(question);
queryCache.set(question, result);
return result;
}
```
### Cold Start Issues
**Problem:** Slow cold starts in serverless environments
**Solution:** Pre-warm your functions:
```typescript
// Pre-initialize outside handler
let cachedQueryEngine: any = null;
export async function handler(event: any) {
if (!cachedQueryEngine) {
cachedQueryEngine = await initializeQueryEngine();
}
// Use cached engine
return await cachedQueryEngine.query(question);
}
```
## Environment Variable Issues
### Missing API Keys
**Problem:** API key not found or invalid
**Solution:** Verify environment variable setup:
```typescript
// Check if API key is available
if (!process.env.OPENAI_API_KEY) {
throw new Error('OPENAI_API_KEY environment variable is required');
}
// For debugging (remove in production)
console.log('API Key present:', !!process.env.OPENAI_API_KEY);
```
### Environment Variable Loading
**Problem:** Environment variables not loading correctly
**Solution:** Use proper loading mechanisms:
```typescript
// For Node.js
import 'dotenv/config';
// For Next.js - use .env.local
// Variables are automatically loaded
// For Cloudflare Workers
export default {
async fetch(request: Request, env: Env): Promise<Response> {
// Use env parameter, not process.env
const apiKey = env.OPENAI_API_KEY;
// ...
},
};
```
## Common Error Messages
### "Cannot find module 'llamaindex'"
**Cause:** Package not installed or module resolution issue
**Solution:**
```bash
npm install llamaindex
```
### "Module not found: Can't resolve 'fs'"
**Cause:** File system modules used in browser/edge environment
**Solution:**
```typescript
// Use dynamic imports with fallbacks
const loadDocuments = async () => {
if (typeof window !== 'undefined') {
// Browser environment - use alternative
return await loadDocumentsFromAPI();
} else {
// Node.js environment - use file system
const { SimpleDirectoryReader } = await import('llamaindex');
return await new SimpleDirectoryReader('data').loadData();
}
};
```
### "ReferenceError: global is not defined"
**Cause:** Global polyfill missing in browser environments
**Solution:**
```typescript
// Add to your app entry point
if (typeof global === 'undefined') {
global = globalThis;
}
```
### "Cannot read properties of undefined (reading 'query')"
**Cause:** Query engine not properly initialized
**Solution:**
```typescript
// Always check initialization
if (!queryEngine) {
throw new Error('Query engine not initialized');
}
// Or use optional chaining
const response = await queryEngine?.query(question);
```
## Debugging Tips
### Enable Debug Logging
```typescript
// Enable debug logging
process.env.DEBUG = "llamaindex:*";
// Or specific modules
process.env.DEBUG = "llamaindex:vector-store";
```
### Check Package Versions
```bash
npm list llamaindex
npm list @llamaindex/openai
```
### Test in Isolation
```typescript
// Create minimal test case
import { VectorStoreIndex } from 'llamaindex';
async function testBasic() {
try {
console.log('Testing basic import...');
const index = new VectorStoreIndex();
console.log('Success!');
} catch (error) {
console.error('Error:', error);
}
}
testBasic();
```
## Getting Help
### Before Asking for Help
1. **Check this troubleshooting guide**
2. **Search existing GitHub issues**
3. **Try minimal reproduction**
4. **Check your environment configuration**
### When Reporting Issues
Include:
- Node.js version (`node --version`)
- Package versions (`npm list llamaindex`)
- Environment (Node.js, Cloudflare Workers, Vercel, etc.)
- Minimal code reproduction
- Full error message and stack trace
### Useful Resources
- [GitHub Issues](https://github.com/run-llama/LlamaIndexTS/issues)
- [Discord Community](https://discord.gg/dGcwcsnxhU)
- [Documentation](https://docs.llamaindex.ai/)
## Next Steps
If you're still experiencing issues:
1. **Check specific deployment guides:**
- [Server APIs](/docs/llamaindex/getting_started/installation/server-apis)
- [Serverless Functions](/docs/llamaindex/getting_started/installation/serverless)
- [Next.js Applications](/docs/llamaindex/getting_started/installation/nextjs)
2. **Open an issue** on GitHub with a minimal reproduction
3. **Join our Discord** for community support
@@ -1,99 +0,0 @@
---
title: With TypeScript
description: In this guide, you'll learn how to use LlamaIndex with TypeScript
---
LlamaIndex.TS is written in TypeScript and designed to be used in TypeScript projects.
We put a lot of work on strong typing to make sure you have a great typing experience with code completion such as:
```ts twoslash
import { PromptTemplate } from 'llamaindex'
const promptTemplate = new PromptTemplate({
template: `Context information from multiple sources is below.
---------------------
{context}
---------------------
Given the information from multiple sources and not prior knowledge.
Answer the query in the style of a Shakespeare play"
Query: {query}
Answer:`,
templateVars: ["context", "query"],
});
// @noErrors
promptTemplate.format({
c
//^|
})
```
## Enable TypeScript
Make sure to set [moduleResolution](https://www.typescriptlang.org/docs/handbook/modules/theory.html#module-resolution) in your `tsconfig.json` file:
```json5
{
compilerOptions: {
// ⬇️ add this line to your tsconfig.json
moduleResolution: "bundler", // or "nodenext" | "node16" | "node"
},
}
```
We recommend using `bundler` or `nodenext`, but due to popularity of `node`, we still added support for it.
## Enable AsyncIterable for `Web Stream` API
Some modules uses `Web Stream` API like `ReadableStream` and `WritableStream`, you need to enable `DOM.AsyncIterable` in your `tsconfig.json`.
```json5
{
compilerOptions: {
// ⬇️ add this lib to your tsconfig.json
lib: ["DOM.AsyncIterable"],
},
}
```
```typescript
import { tool } from 'llamaindex'
import { agent } from "@llamaindex/workflow";
import { openai } from "@llamaindex/openai";
Settings.llm = openai({
model: "gpt-4o-mini",
});
const addTool = tool({
name: "add",
description: "Adds two numbers",
parameters: z.object({x: z.number(), y: z.number()}),
execute: ({ x, y }) => x + y,
});
const myAgent = agent({
tools: [addTool],
});
// Chat with the agent
const context = myAgent.run("Hello, how are you?");
for await (const event of context) {
if (event instanceof AgentStream) {
for (const chunk of event.data.delta) {
process.stdout.write(chunk); // stream response
}
} else {
console.log(event); // other events
}
}
```
## Run TypeScript Script in Node.js
We recommend to use [tsx](https://www.npmjs.com/package/tsx) to run TypeScript script in Node.js.
```shell
node --import tsx ./my-script.ts
```
@@ -1,23 +0,0 @@
---
title: With Vite
description: In this guide, you'll learn how to use LlamaIndex with Vite
---
Before you start, make sure you have try LlamaIndex.TS in Node.js to make sure you understand the basics.
<Card
title="Getting Started with LlamaIndex.TS in Node.js"
href="/docs/llamaindex/getting_started/installation/node"
/>
Also, make sure you have a basic understanding of [Vite](https://vitejs.dev/).
## Why mention Vite?
Vite.js is widely used in building many web applications, like React.js, even for some native app like [Electron](https://www.electronjs.org/).
However, it's not a ready-to-use solution for a Node.js-like application using Vite, as Vite is designed for web applications(run in browser).
There's some plugin/framework based on Vite, like [Waku.gg](https://github.com/dai-shi/waku), or [Electron Vite](https://electron-vite.org/)
For now, we have no clear solution for bundling LlamaIndex.TS with Vite, if you have any idea/solution, please let us know.
+105 -8
View File
@@ -1,21 +1,118 @@
---
title: What is LlamaIndex.TS
description: LlamaIndex is the leading data framework for building LLM applications
title: Welcome to LlamaIndex.TS
description: LlamaIndex.TS is the leading framework for utilizing context engineering to build LLM applications in JavaScript and TypeScript.
---
LlamaIndex is a framework for building context-augmented generative AI applications with LLMs including agents and workflows.
LlamaIndex.TS is a **framework for utilizing context engineering to build generative AI applications** with large language models. From rapid-prototyping RAG chatbots to deploying multi-agent workflows in production, LlamaIndex gives you everything you need — all in idiomatic TypeScript.
The TypeScript implementation is designed for JavaScript server side applications using <SiNodedotjs className="inline" color="#5FA04E" /> Node.js, <SiDeno className="inline" color="#70FFAF" /> Deno, <SiBun className="inline" /> Bun, <SiCloudflareworkers className="inline" color="#F38020" /> Cloudflare Workers, and more.
Built for modern JavaScript runtimes like <SiNodedotjs className="inline" color="#5FA04E" /> **Node.js**, <SiDeno className="inline" color="#70FFAF" /> **Deno**, <SiBun className="inline" /> **Bun**, <SiCloudflareworkers className="inline" color="#F38020" /> **Cloudflare Workers**, and more.
LlamaIndex.TS provides tools for beginners, advanced users, and everyone in between.
<div className="grid grid-cols-1 gap-4 sm:grid-cols-2 lg:grid-cols-3 my-6">
<a href="#introduction" className="block rounded-lg border border-gray-600/40 p-4 hover:border-gray-400 hover:bg-gray-700/20 no-underline">
<h3 className="mb-1 text-lg font-semibold underline">Introduction</h3>
<p className="text-sm text-gray-400 no-underline">Context engineering, agents &amp; workflows — what do they mean?</p>
</a>
Try it out with a starter example using StackBlitz:
<a href="#use-cases" className="block rounded-lg border border-gray-600/40 p-4 hover:border-gray-400 hover:bg-gray-700/20 no-underline">
<h3 className="mb-1 text-lg font-semibold underline">Use cases</h3>
<p className="text-sm text-gray-400 no-underline">See what you can build with LlamaIndex.TS.</p>
</a>
<a href="#getting-started" className="block rounded-lg border border-gray-600/40 p-4 hover:border-gray-400 hover:bg-gray-700/20 no-underline">
<h3 className="mb-1 text-lg font-semibold underline">Getting started</h3>
<p className="text-sm text-gray-400 no-underline">Your first app in 5 lines of code.</p>
</a>
<a href="https://docs.cloud.llamaindex.ai/" className="block rounded-lg border border-gray-600/40 p-4 hover:border-gray-400 hover:bg-gray-700/20 no-underline" target="_blank" rel="noopener noreferrer">
<h3 className="mb-1 text-lg font-semibold underline">LlamaCloud</h3>
<p className="text-sm text-gray-400 no-underline">Managed parsing, extraction &amp; retrieval pipelines.</p>
</a>
<a href="#community" className="block rounded-lg border border-gray-600/40 p-4 hover:border-gray-400 hover:bg-gray-700/20 no-underline">
<h3 className="mb-1 text-lg font-semibold underline">Community</h3>
<p className="text-sm text-gray-400 no-underline">Join thousands of builders on Discord, Twitter, and more.</p>
</a>
<a href="#related-projects" className="block rounded-lg border border-gray-600/40 p-4 hover:border-gray-400 hover:bg-gray-700/20 no-underline">
<h3 className="mb-1 text-lg font-semibold underline">Related projects</h3>
<p className="text-sm text-gray-400 no-underline">Connectors, demos &amp; starter kits.</p>
</a>
</div>
## Introduction
### What are agents?
[Agents](/docs/llamaindex/tutorials/agents/1_setup) are LLM-powered assistants that can reason, use external tools, and take actions to accomplish tasks such as research, data extraction, and automation.
LlamaIndex.TS provides foundational building blocks for creating and orchestrating these agents.
### What are workflows?
[Workflows](/docs/llamaindex/tutorials/workflows) are multi-step, event-driven processes that combine agents, data connectors, and other tools to solve complex problems.
With LlamaIndex.TS you can chain together retrieval, generation, and tool-calling steps and then deploy the entire pipeline as a microservice.
### What is context engineering?
LLMs come pre-trained on vast public corpora, but not on **your** private or domain-specific data.
Context engineering bridges that gap by injecting the right pieces of your data into the LLM prompt at the right time.
The most popular example is [Retrieval-Augmented Generation (RAG)](/docs/llamaindex/getting_started/concepts), but the same idea powers agent memory, evaluation, extraction, summarisation, and more.
LlamaIndex.TS gives you:
- **Data connectors** to ingest from APIs, files, SQL, and dozens more sources.
- **Indexes & retrievers** to store and retrieve your data for LLM consumption.
- **Agents and Engines** to query and use chat+reasoning interfaces over your data.
- **Workflows** for fine-grained orchestration of your data and LLM-powered agents.
- **Observability** integrations so you can iterate with confidence.
You can learn more about these concepts in our [concepts guide](/docs/llamaindex/getting_started/concepts).
## Use cases
Popular scenarios include:
- [LLM-Powered Agents](/docs/llamaindex/tutorials/agents/1_setup)
- [Indexing and Retrieval](/docs/llamaindex/tutorials/rag)
- [Extracting Structured Data](/docs/llamaindex/tutorials/structured_data_extraction)
- [Custom Orchestration with Workflows](/docs/llamaindex/tutorials/workflows)
## Getting started
The fastest way to get started is in StackBlitz below — no local setup required:
<iframe
className="w-full h-[440px]"
aria-label="LlamaIndex.TS Starter"
aria-description="This is a starter example for LlamaIndex.TS, it shows the basic usage of the library."
aria-description="Interactive starter for LlamaIndex.TS"
src="https://stackblitz.com/github/run-llama/LlamaIndexTS/tree/main/examples?embed=1&file=starter.ts"
/>
You'll need an OpenAI API key to run this example. You can retrieve it from [OpenAI](https://platform.openai.com/api-keys).
Want to learn more? We have several tutorials to get you started:
- [Installation + Runtime Guide](/docs/llamaindex/getting_started/installation)
- [Create your first agent](/docs/llamaindex/tutorials/agents/1_setup)
- [Learn how to index data and chat with it](/docs/llamaindex/tutorials/rag)
- [Learn how to write your own workflows and agents](/docs/llamaindex/tutorials/workflows)
---
## LlamaCloud
Need an end-to-end managed pipeline? Check out **[LlamaCloud](https://cloud.llamaindex.ai/)**: best-in-class document parsing (LlamaParse), extraction (LlamaExtract), and indexing services with generous free tiers.
---
## Community
- [Twitter](https://twitter.com/llama_index)
- [Discord](https://discord.gg/dGcwcsnxhU)
- [LinkedIn](https://www.linkedin.com/company/llamaindex/)
We 💜 contributors! View our [contributing guide](https://github.com/run-llama/LlamaIndexTS/blob/main/CONTRIBUTING.md) to get started.
## Related projects
- [Python framework GitHub](https://github.com/run-llama/llama_index)
- [Python docs](https://docs.llamaindex.ai/)
- [create-llama](https://www.npmjs.com/package/create-llama) — scaffold a new project in seconds!
- [UI Components](https://ui.llamaindex.ai/) — build chat applications with our Next.js components.
@@ -34,6 +34,7 @@ const jokeAgent = agent({
// Run the workflow
const result = await jokeAgent.run("Tell me something funny");
console.log(result.data.result); // Baby Llama is called cria
console.log(result.data.message); // { role: 'assistant', content: 'Baby Llama is called cria' }
```
### Event Streaming
@@ -106,34 +106,40 @@ const memory = createMemory({
Long-term memory is represented as `Memory Block` objects. These objects contain information that are from previous user sessions or from the beginning of the current conversation. When memory is retrieved (by calling `getLLM`), the short-term and long-term memories are merged together within the given `tokenLimit`.
Currently, there are two predefined memory blocks:
Currently, there are three predefined memory blocks:
- `staticBlock`: A memory block that stores a static piece of information.
- `factExtractionBlock`: A memory block that extracts facts from the chat history.
- `vectorBlock`: A memory block that stores and retrieves chat messages from a vector database using semantic similarity search. Messages are stored individually and retrieved based on their relevance to recent conversation context. Here we've passed in the `vectorStore` to use to store and retrieve the chat messages.
This sounds a bit complicated, but it's actually quite simple. Let's look at an example:
```ts
import { createMemory, factExtractionBlock, staticBlock } from "llamaindex";
import { createMemory, factExtractionBlock, staticBlock, vectorBlock } from "llamaindex";
import { QdrantVectorStore } from "@llamaindex/qdrant";
import { OpenAIEmbedding } from "@llamaindex/openai";
const memoryBlocks= [
staticBlock({
id: "core_info",
content: "My name is Logan, and I live in Saskatoon. I work at LlamaIndex.",
}),
factExtractionBlock({
id: "user-extracted_info",
priority: 1,
llm: llm,
maxFacts: 50,
}),
vectorBlock({
vectorStore: new QdrantVectorStore({ url: "http://localhost:6333" }),
priority: 2,
}),
];
```
Here, we've setup two memory blocks:
Here, we've setup three memory blocks:
- `core_info`: A static memory block that stores some core information about the user. This information will always be inserted into the memory. The type used is `MessageContent` to support multi-modal content.
- `extracted_info`: An extracted memory block that will extract information from the chat history. Here we've passed in the `llm` to use to extract facts from the chat history, and set the `maxFacts` to 50. If the number of extracted facts exceeds this limit, the `maxFacts` will be automatically summarized and reduced to leave room for new information.
- `staticBlock`: A static memory block that stores some core information about the user. This information will always be inserted into the memory. The type used is `MessageContent` to support multi-modal content.
- `factExtractionBlock`: An extracted memory block that will extract information from the chat history. Here we've passed in the `llm` to use to extract facts from the chat history, and set the `maxFacts` to 50. If the number of extracted facts exceeds this limit, the `maxFacts` will be automatically summarized and reduced to leave room for new information.
- `vectorBlock`: A vector memory block that will store in a vector database and retrieve them from there. Messages are stored individually and retrieved based on their relevance to recent conversation context. Here we've passed in the `vectorStore` to use to store and retrieve the chat messages.
You'll also notice that we've set the `priority` for the `factExtractionBlock` block. This is used to determine the handling when the memory blocks content (i.e. long-term memory) + short-term memory exceeds the token limit on the `Memory` object.
@@ -158,6 +164,46 @@ When memory is retrieved (using `getLLM`), the short-term and long-term memories
The amount of short-term memory included is specified by the `shortTermTokenLimitRatio`. If it's set to `0.7`, 70% of the `tokenLimit` is used for short-term memory (not including the static memory block).
#### VectorBlock Configuration Options
The `vectorBlock` offers several configuration options to customize its behavior:
```ts
vectorBlock({
vectorStore: new QdrantVectorStore({ url: "http://localhost:6333" }),
priority: 2,
retrievalContextWindow: 5, // Number of recent messages to use for context when retrieving
formatTemplate: new PromptTemplate({ template: "Context: {{ context }}" }), // Custom formatting template
nodePostprocessors: [/* custom postprocessors */], // Apply processing to retrieved nodes
queryOptions: {
similarityTopK: 3, // Number of top similar results to return (default: 2)
mode: VectorStoreQueryMode.DEFAULT, // Query mode for the vector store
sessionFilterKey: "session_id", // Metadata key for session filtering (default: "session_id")
// Custom filters can be added here - session filter is automatically included
filters: {
filters: [
{ key: "custom_field", value: "custom_value", operator: "==" }
],
condition: "and"
}
}
})
```
**Key Configuration Options:**
- **`retrievalContextWindow`**: Number of recent messages to consider when creating the retrieval query (default: 5). A larger window provides more context but may be less precise.
- **`formatTemplate`**: Template for formatting retrieved information before adding to memory. Defaults to a simple context template.
- **`nodePostprocessors`**: Array of postprocessors to apply to retrieved nodes, useful for filtering or transforming results.
- **`queryOptions.similarityTopK`**: Number of most similar messages to retrieve from the vector store (default: 2).
- **`queryOptions.sessionFilterKey`**: Metadata key used to isolate memory between different sessions (default: "session_id").
- **`queryOptions.filters`**: Additional metadata filters for retrieval. The session filter is automatically added to ensure memory isolation.
**Session Isolation:**
The vectorBlock automatically adds a session filter using the block's ID to ensure that memories from different sessions don't interfere with each other. This filter uses the `sessionFilterKey` (default: "session_id") and can be customized if needed.
## Persistence with Snapshots
Save and restore memory state:
@@ -38,10 +38,13 @@ You should expect output something like:
{
result: '5 + 5 is 10. Then, 10 divided by 2 is 5.',
state: {
memory: ChatMemoryBuffer {
chatStore: SimpleChatStore {},
chatStoreKey: 'chat_history',
tokenLimit: 750000
memory: Memory {
messages: [Array],
tokenLimit: 30000,
shortTermTokenLimitRatio: 0.7,
memoryBlocks: [],
memoryCursor: 0,
adapters: [Object]
},
scratchpad: [],
currentAgentName: 'Agent',
@@ -1,5 +1,17 @@
# @llamaindex/cloudflare-worker-agent-test
## 0.0.182
### Patch Changes
- llamaindex@0.11.21
## 0.0.181
### Patch Changes
- llamaindex@0.11.20
## 0.0.180
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloudflare-worker-agent-test",
"version": "0.0.180",
"version": "0.0.182",
"type": "module",
"private": true,
"scripts": {
@@ -1,5 +1,18 @@
# @llamaindex/llama-parse-browser-test
## 0.0.81
### Patch Changes
- @llamaindex/cloud@4.0.26
## 0.0.80
### Patch Changes
- Updated dependencies [2967d57]
- @llamaindex/cloud@4.0.25
## 0.0.79
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/llama-parse-browser-test",
"private": true,
"version": "0.0.79",
"version": "0.0.81",
"type": "module",
"scripts": {
"dev": "vite",
+12
View File
@@ -1,5 +1,17 @@
# @llamaindex/next-agent-test
## 0.1.182
### Patch Changes
- llamaindex@0.11.21
## 0.1.181
### Patch Changes
- llamaindex@0.11.20
## 0.1.180
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-agent-test",
"version": "0.1.180",
"version": "0.1.182",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,17 @@
# test-edge-runtime
## 0.1.181
### Patch Changes
- llamaindex@0.11.21
## 0.1.180
### Patch Changes
- llamaindex@0.11.20
## 0.1.179
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/nextjs-edge-runtime-test",
"version": "0.1.179",
"version": "0.1.181",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,27 @@
# @llamaindex/next-node-runtime
## 0.1.51
### Patch Changes
- llamaindex@0.11.21
- @llamaindex/huggingface@0.1.22
- @llamaindex/readers@3.1.16
## 0.1.50
### Patch Changes
- llamaindex@0.11.20
- @llamaindex/huggingface@0.1.21
- @llamaindex/readers@3.1.15
## 0.1.49
### Patch Changes
- @llamaindex/huggingface@0.1.20
## 0.1.48
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-node-runtime-test",
"version": "0.1.48",
"version": "0.1.51",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,17 @@
# vite-import-llamaindex
## 0.0.48
### Patch Changes
- llamaindex@0.11.21
## 0.0.47
### Patch Changes
- llamaindex@0.11.20
## 0.0.46
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "vite-import-llamaindex",
"private": true,
"version": "0.0.46",
"version": "0.0.48",
"type": "module",
"scripts": {
"build": "vite build",
@@ -1,5 +1,17 @@
# @llamaindex/waku-query-engine-test
## 0.0.182
### Patch Changes
- llamaindex@0.11.21
## 0.0.181
### Patch Changes
- llamaindex@0.11.20
## 0.0.180
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/waku-query-engine-test",
"version": "0.0.180",
"version": "0.0.182",
"type": "module",
"private": true,
"scripts": {
+1 -1
View File
@@ -23,7 +23,7 @@ await test("pinecone", async (t) => {
});
const vectorStore = new PineconeVectorStore({
embeddingModel: openaiEmbedding,
embedModel: openaiEmbedding,
});
t.after(async () => {
+127
View File
@@ -1,5 +1,132 @@
# examples
## 0.3.33
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/cloud@4.0.26
- llamaindex@0.11.21
- @llamaindex/node-parser@2.0.17
- @llamaindex/anthropic@0.3.19
- @llamaindex/assemblyai@0.1.16
- @llamaindex/clip@0.0.68
- @llamaindex/cohere@0.0.31
- @llamaindex/deepinfra@0.0.68
- @llamaindex/discord@0.1.16
- @llamaindex/google@0.3.16
- @llamaindex/huggingface@0.1.22
- @llamaindex/jinaai@0.0.28
- @llamaindex/mistral@0.1.17
- @llamaindex/mixedbread@0.0.31
- @llamaindex/notion@0.1.16
- @llamaindex/ollama@0.1.17
- @llamaindex/openai@0.4.12
- @llamaindex/perplexity@0.0.25
- @llamaindex/portkey-ai@0.0.59
- @llamaindex/replicate@0.0.59
- @llamaindex/bm25-retriever@0.0.6
- @llamaindex/astra@0.0.31
- @llamaindex/azure@0.1.29
- @llamaindex/chroma@0.0.31
- @llamaindex/elastic-search@0.1.17
- @llamaindex/firestore@1.0.24
- @llamaindex/milvus@0.1.26
- @llamaindex/mongodb@0.0.32
- @llamaindex/pinecone@0.1.17
- @llamaindex/postgres@0.0.60
- @llamaindex/qdrant@0.1.27
- @llamaindex/supabase@0.1.18
- @llamaindex/upstash@0.0.31
- @llamaindex/weaviate@0.0.32
- @llamaindex/vercel@0.1.17
- @llamaindex/voyage-ai@1.0.23
- @llamaindex/readers@3.1.16
- @llamaindex/tools@0.1.7
- @llamaindex/workflow@1.1.17
- @llamaindex/deepseek@0.0.29
- @llamaindex/fireworks@0.0.28
- @llamaindex/groq@0.0.84
- @llamaindex/together@0.0.28
- @llamaindex/vllm@0.0.54
- @llamaindex/xai@0.0.15
## 0.3.32
### Patch Changes
- Updated dependencies [650eeb1]
- Updated dependencies [a8ec08c]
- Updated dependencies [2967d57]
- @llamaindex/google@0.3.15
- @llamaindex/core@0.6.16
- @llamaindex/workflow@1.1.16
- @llamaindex/cloud@4.0.25
- llamaindex@0.11.20
- @llamaindex/node-parser@2.0.16
- @llamaindex/anthropic@0.3.18
- @llamaindex/assemblyai@0.1.15
- @llamaindex/clip@0.0.67
- @llamaindex/cohere@0.0.30
- @llamaindex/deepinfra@0.0.67
- @llamaindex/discord@0.1.15
- @llamaindex/huggingface@0.1.21
- @llamaindex/jinaai@0.0.27
- @llamaindex/mistral@0.1.16
- @llamaindex/mixedbread@0.0.30
- @llamaindex/notion@0.1.15
- @llamaindex/ollama@0.1.16
- @llamaindex/openai@0.4.11
- @llamaindex/perplexity@0.0.24
- @llamaindex/portkey-ai@0.0.58
- @llamaindex/replicate@0.0.58
- @llamaindex/bm25-retriever@0.0.5
- @llamaindex/astra@0.0.30
- @llamaindex/azure@0.1.28
- @llamaindex/chroma@0.0.30
- @llamaindex/elastic-search@0.1.16
- @llamaindex/firestore@1.0.23
- @llamaindex/milvus@0.1.25
- @llamaindex/mongodb@0.0.31
- @llamaindex/pinecone@0.1.16
- @llamaindex/postgres@0.0.59
- @llamaindex/qdrant@0.1.26
- @llamaindex/supabase@0.1.17
- @llamaindex/upstash@0.0.30
- @llamaindex/weaviate@0.0.31
- @llamaindex/vercel@0.1.16
- @llamaindex/voyage-ai@1.0.22
- @llamaindex/readers@3.1.15
- @llamaindex/tools@0.1.6
- @llamaindex/deepseek@0.0.28
- @llamaindex/fireworks@0.0.27
- @llamaindex/groq@0.0.83
- @llamaindex/together@0.0.27
- @llamaindex/vllm@0.0.53
- @llamaindex/xai@0.0.14
## 0.3.31
### Patch Changes
- Updated dependencies [d8f4f6a]
- Updated dependencies [856dd8c]
- @llamaindex/supabase@0.1.16
- @llamaindex/openai@0.4.10
- @llamaindex/clip@0.0.66
- @llamaindex/deepinfra@0.0.66
- @llamaindex/deepseek@0.0.27
- @llamaindex/fireworks@0.0.26
- @llamaindex/groq@0.0.82
- @llamaindex/huggingface@0.1.20
- @llamaindex/jinaai@0.0.26
- @llamaindex/perplexity@0.0.23
- @llamaindex/azure@0.1.27
- @llamaindex/together@0.0.26
- @llamaindex/vllm@0.0.52
- @llamaindex/xai@0.0.13
## 0.3.30
### Patch Changes
+1
View File
@@ -24,6 +24,7 @@ async function main() {
state: result.data.state,
});
console.log(`${JSON.stringify(caResult, null, 2)}`);
console.log("assistant message:", result.data.message);
}
main().catch((error) => {
+150
View File
@@ -0,0 +1,150 @@
/**
* Example: Vector Memory Block
*
* This example demonstrates how to use the VectorMemoryBlock to store and retrieve
* conversation history using vector similarity search. The vector memory block
* stores messages in a vector store and can retrieve relevant context based on
* semantic similarity to recent messages.
*/
import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
import { QdrantVectorStore } from "@llamaindex/qdrant";
import { createMemory, vectorBlock } from "llamaindex";
// Set up the LLM and embedding model
const llm = new OpenAI({ model: "gpt-4.1-mini" });
const embedModel = new OpenAIEmbedding({ model: "text-embedding-3-small" });
// Simulate a conversation with some context
// This conversation has 8 messages, which is more than the token limit of 100 tokens (set below)
// The last 4 messages are kept in to short term memory block (as their tokens are in the limit)
// Whereas the first 5 messages are added to long term memory block (in here we will use the vector memory block with Qdrant)
const CONVERSATION_TURNS = [
//// This is the first 5 messages that are added to long term memory block (vector memory block)
{
role: "user",
content: "Hi, I'm Sarah and I work as a data scientist at Google.",
},
{
role: "assistant",
content:
"Hello Sarah! It's great to meet you. Data science at Google must be exciting!",
},
{
role: "user",
content:
"Yes, I specialize in machine learning and natural language processing.",
},
{
role: "assistant",
content: "That's impressive! ML and NLP are fascinating fields.",
},
{
role: "user",
content:
"I have a PhD in Computer Science from Stanford, and I love hiking on weekends.",
},
//// This is the last 4 messages that are added to short term memory block
{
role: "assistant",
content:
"Wow, Stanford PhD! And hiking is a great way to unwind from tech work.",
},
{
role: "user",
content: "I also have two cats named Whiskers and Mittens.",
},
{
role: "assistant",
content:
"Cats make wonderful companions! Whiskers and Mittens are cute names.",
},
{
role: "user",
content: "Summary information about Sarah and her cats",
},
];
async function main() {
console.log("=== Vector Memory Block Example ===\n");
/**
* Create a vector store. You can quickly get a local instance of Qdrant running with Docker:
* ```bash
* docker pull qdrant/qdrant
* docker run -p 6333:6333 qdrant/qdrant
* ```
*
* Go to http://localhost:6333/dashboard#/collections to see your data
*/
const vectorStore = new QdrantVectorStore({
url: "http://localhost:6333",
embedModel,
});
// Create a vector memory block using the factory function
const vectorMemoryBlock = vectorBlock({
vectorStore,
priority: 5,
});
// Create a memory store with the vector memory block
const memory = createMemory([], {
llm,
memoryBlocks: [vectorMemoryBlock],
tokenLimit: 100,
shortTermTokenLimitRatio: 0.7,
});
// Store the conversation history in the vector memory
console.log(`Adding ${CONVERSATION_TURNS.length} messages to the memory...`);
for (const message of CONVERSATION_TURNS) {
await memory.add(message);
}
// Retrieve relevant context for the current user request
console.log("Retrieving relevant context...");
const chatHistory = await memory.getLLM();
// You will see there's 1 generated context message from vector memory block, and 4 messages from short term memory block
console.log("Chat memory:", chatHistory);
// Now simulate the assistant responding with context
console.log("\nAssistant response with context:");
const response = await llm.chat({
messages: chatHistory,
});
console.log(response.message.content);
// Try adding more messages to the memory
const newMessages = [
{
role: "user",
content: "Write a long paragraph about weather in Tokyo",
},
{
role: "assistant",
content:
"The weather in Tokyo is sunny and warm. The temperature is around 20 degrees Celsius. The weather is very nice and the people are friendly.",
},
{
role: "user",
content: "What is the weather in Tokyo?",
},
];
// Add the new messages to the memory
for (const message of newMessages) {
await memory.add(message);
}
// Try retrieving the new messages
const newChatHistory = await memory.getLLM();
// You can see now that new chat history will contain the nodes (separated by `\n`) in the
// context message that is generated by the vector memory block
// The number of retrieved nodes is set by `similarityTopK` in `queryOptions` of `vectorBlock`
// (default `similarityTopK` is 2)
console.log("New chat history:", newChatHistory);
}
main().catch(console.error);
+47 -47
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/examples",
"version": "0.3.30",
"version": "0.3.33",
"private": true,
"scripts": {
"lint": "eslint .",
@@ -11,52 +11,52 @@
"@azure/cosmos": "^4.1.1",
"@azure/identity": "^4.4.1",
"@azure/search-documents": "^12.1.0",
"@llamaindex/anthropic": "^0.3.17",
"@llamaindex/assemblyai": "^0.1.14",
"@llamaindex/astra": "^0.0.29",
"@llamaindex/azure": "^0.1.26",
"@llamaindex/bm25-retriever": "^0.0.4",
"@llamaindex/chroma": "^0.0.29",
"@llamaindex/clip": "^0.0.65",
"@llamaindex/cloud": "^4.0.24",
"@llamaindex/cohere": "^0.0.29",
"@llamaindex/core": "^0.6.15",
"@llamaindex/deepinfra": "^0.0.65",
"@llamaindex/deepseek": "^0.0.26",
"@llamaindex/discord": "^0.1.14",
"@llamaindex/elastic-search": "^0.1.15",
"@llamaindex/anthropic": "^0.3.19",
"@llamaindex/assemblyai": "^0.1.16",
"@llamaindex/astra": "^0.0.31",
"@llamaindex/azure": "^0.1.29",
"@llamaindex/bm25-retriever": "^0.0.6",
"@llamaindex/chroma": "^0.0.31",
"@llamaindex/clip": "^0.0.68",
"@llamaindex/cloud": "^4.0.26",
"@llamaindex/cohere": "^0.0.31",
"@llamaindex/core": "^0.6.17",
"@llamaindex/deepinfra": "^0.0.68",
"@llamaindex/deepseek": "^0.0.29",
"@llamaindex/discord": "^0.1.16",
"@llamaindex/elastic-search": "^0.1.17",
"@llamaindex/env": "^0.1.30",
"@llamaindex/firestore": "^1.0.22",
"@llamaindex/fireworks": "^0.0.25",
"@llamaindex/google": "^0.3.14",
"@llamaindex/groq": "^0.0.81",
"@llamaindex/huggingface": "^0.1.19",
"@llamaindex/jinaai": "^0.0.25",
"@llamaindex/milvus": "^0.1.24",
"@llamaindex/mistral": "^0.1.15",
"@llamaindex/mixedbread": "^0.0.29",
"@llamaindex/mongodb": "^0.0.30",
"@llamaindex/node-parser": "^2.0.15",
"@llamaindex/notion": "^0.1.14",
"@llamaindex/ollama": "^0.1.15",
"@llamaindex/openai": "^0.4.9",
"@llamaindex/perplexity": "^0.0.22",
"@llamaindex/pinecone": "^0.1.15",
"@llamaindex/portkey-ai": "^0.0.57",
"@llamaindex/postgres": "^0.0.58",
"@llamaindex/qdrant": "^0.1.25",
"@llamaindex/readers": "^3.1.14",
"@llamaindex/replicate": "^0.0.57",
"@llamaindex/supabase": "^0.1.15",
"@llamaindex/together": "^0.0.25",
"@llamaindex/tools": "^0.1.5",
"@llamaindex/upstash": "^0.0.29",
"@llamaindex/vercel": "^0.1.15",
"@llamaindex/vllm": "^0.0.51",
"@llamaindex/voyage-ai": "^1.0.21",
"@llamaindex/weaviate": "^0.0.30",
"@llamaindex/workflow": "^1.1.15",
"@llamaindex/xai": "^0.0.12",
"@llamaindex/firestore": "^1.0.24",
"@llamaindex/fireworks": "^0.0.28",
"@llamaindex/google": "^0.3.16",
"@llamaindex/groq": "^0.0.84",
"@llamaindex/huggingface": "^0.1.22",
"@llamaindex/jinaai": "^0.0.28",
"@llamaindex/milvus": "^0.1.26",
"@llamaindex/mistral": "^0.1.17",
"@llamaindex/mixedbread": "^0.0.31",
"@llamaindex/mongodb": "^0.0.32",
"@llamaindex/node-parser": "^2.0.17",
"@llamaindex/notion": "^0.1.16",
"@llamaindex/ollama": "^0.1.17",
"@llamaindex/openai": "^0.4.12",
"@llamaindex/perplexity": "^0.0.25",
"@llamaindex/pinecone": "^0.1.17",
"@llamaindex/portkey-ai": "^0.0.59",
"@llamaindex/postgres": "^0.0.60",
"@llamaindex/qdrant": "^0.1.27",
"@llamaindex/readers": "^3.1.16",
"@llamaindex/replicate": "^0.0.59",
"@llamaindex/supabase": "^0.1.18",
"@llamaindex/together": "^0.0.28",
"@llamaindex/tools": "^0.1.7",
"@llamaindex/upstash": "^0.0.31",
"@llamaindex/vercel": "^0.1.17",
"@llamaindex/vllm": "^0.0.54",
"@llamaindex/voyage-ai": "^1.0.23",
"@llamaindex/weaviate": "^0.0.32",
"@llamaindex/workflow": "^1.1.17",
"@llamaindex/xai": "^0.0.15",
"@notionhq/client": "^4.0.0",
"@pinecone-database/pinecone": "^4.0.0",
"@vercel/postgres": "^0.10.0",
@@ -65,7 +65,7 @@
"commander": "^12.1.0",
"dotenv": "^17.2.0",
"js-tiktoken": "^1.0.14",
"llamaindex": "^0.11.19",
"llamaindex": "^0.11.21",
"mongodb": "6.7.0",
"postgres": "^3.4.4",
"wikipedia": "^2.1.2",
+1 -1
View File
@@ -15,7 +15,7 @@ async function main() {
const vectorStore = new QdrantVectorStore({
url: process.env.QDRANT_URL,
apiKey: process.env.QDRANT_API_KEY,
embeddingModel: embedding,
embedModel: embedding,
collectionName: "gemini_test",
});
const storageContext = await storageContextFromDefaults({ vectorStore });
+1 -1
View File
@@ -16,7 +16,7 @@ async function main() {
const vectorStore = new QdrantVectorStore({
url: process.env.QDRANT_URL,
apiKey: process.env.QDRANT_API_KEY,
embeddingModel: embedding,
embedModel: embedding,
collectionName: "jina_test",
});
const storageContext = await storageContextFromDefaults({ vectorStore });
+12
View File
@@ -1,5 +1,17 @@
# @llamaindex/autotool
## 8.0.21
### Patch Changes
- llamaindex@0.11.21
## 8.0.20
### Patch Changes
- llamaindex@0.11.20
## 8.0.19
### Patch Changes
@@ -1,5 +1,19 @@
# @llamaindex/autotool-01-node-example
## 0.0.129
### Patch Changes
- llamaindex@0.11.21
- @llamaindex/autotool@8.0.21
## 0.0.128
### Patch Changes
- llamaindex@0.11.20
- @llamaindex/autotool@8.0.20
## 0.0.127
### Patch Changes
@@ -13,5 +13,5 @@
"scripts": {
"start": "node --import tsx --import @llamaindex/autotool/node ./src/index.ts"
},
"version": "0.0.127"
"version": "0.0.129"
}
+1 -1
View File
@@ -6,7 +6,7 @@
"url": "git+https://github.com/run-llama/LlamaIndexTS.git",
"directory": "packages/autotool"
},
"version": "8.0.19",
"version": "8.0.21",
"description": "auto transpile your JS function to LLM Agent compatible",
"files": [
"dist",
+15
View File
@@ -1,5 +1,20 @@
# @llamaindex/cloud
## 4.0.26
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 4.0.25
### Patch Changes
- 2967d57: Default to \_public agent url id
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 4.0.24
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloud",
"version": "4.0.24",
"version": "4.0.26",
"type": "module",
"license": "MIT",
"scripts": {
+21 -6
View File
@@ -33,7 +33,7 @@ export class AgentClient<T = unknown> {
apiKey = getEnv("LLAMA_CLOUD_API_KEY"),
baseUrl = "https://api.cloud.llamaindex.ai/",
collection = "default",
agentUrlId = "default",
agentUrlId = "_public",
}: {
apiKey?: string;
baseUrl?: string;
@@ -127,7 +127,7 @@ export class AgentClient<T = unknown> {
}
/**
* List agent data
* Search agent data
*/
async search(
options: SearchAgentDataOptions,
@@ -275,7 +275,8 @@ export interface AgentDataClientOptions<T = unknown> {
collection?: string;
}
/**
* Create a new AsyncAgentDataClient instance
* Create a new AsyncAgentDataClient instance. Does it's best to infer an agent url id from environment.
* Pass in the window url and/or env to infer the agent url id from them.
* @param options - The options for the client
* @returns A new AgentClient instance
*/
@@ -283,20 +284,34 @@ export function createAgentDataClient<T = unknown>({
apiKey,
baseUrl,
windowUrl,
env,
agentUrlId,
collection = "default",
}: {
apiKey?: string;
baseUrl?: string;
windowUrl?: string;
env?: Record<string, string>;
agentUrlId?: string;
collection?: string;
} = {}): AgentClient<T> {
if (env && !agentUrlId) {
agentUrlId =
env.LLAMA_DEPLOY_DEPLOYMENT_NAME ||
env.NEXT_PUBLIC_LLAMA_DEPLOY_DEPLOYMENT_NAME ||
env.VITE_LLAMA_DEPLOY_DEPLOYMENT_NAME;
}
if (windowUrl && !agentUrlId) {
try {
const path = new URL(windowUrl).pathname;
// /deployments/<agent-url-id>/ui/ -> ["", "deployments", "<agent-url-id>", "ui"]
agentUrlId = path.split("/")[2];
const url = new URL(windowUrl);
const path = url.pathname;
const isLocalhost = // local agents should default to _public, otherwise a full deployment is required
url.hostname.includes("localhost") ||
url.hostname.includes("127.0.0.1");
if (path.startsWith("/deployments/") && !isLocalhost) {
// /deployments/<agent-url-id>/ui/ -> ["", "deployments", "<agent-url-id>", "ui"]
agentUrlId = path.split("/")[2];
}
} catch (error) {
console.warn(
"Failed to infer agent url id from window url, falling back to default",
+12
View File
@@ -1,5 +1,17 @@
# @llamaindex/core
## 0.6.17
### Patch Changes
- 38da40b: feat: VectoryMemoryBlock
## 0.6.16
### Patch Changes
- a8ec08c: fix: ensure correct message content in agent workflow
## 0.6.15
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/core",
"type": "module",
"version": "0.6.15",
"version": "0.6.17",
"description": "LlamaIndex Core Module",
"exports": {
"./agent": {
+3 -1
View File
@@ -39,7 +39,9 @@ export abstract class BaseMemoryBlock<
*
* @returns The memory block content as an array of ChatMessage.
*/
abstract get(): Promise<MemoryMessage<TAdditionalMessageOptions>[]>;
abstract get(
messages?: MemoryMessage<TAdditionalMessageOptions>[],
): Promise<MemoryMessage<TAdditionalMessageOptions>[]>;
/**
* Store the messages in the memory block.
+1
View File
@@ -1,3 +1,4 @@
export { BaseMemoryBlock } from "./base";
export { FactExtractionMemoryBlock } from "./fact";
export { StaticMemoryBlock } from "./static";
export { VectorMemoryBlock } from "./vector";
+250
View File
@@ -0,0 +1,250 @@
import type { BaseEmbedding } from "../../embeddings";
import type { BaseNodePostprocessor } from "../../postprocessor";
import { BasePromptTemplate, defaultContextSystemPrompt } from "../../prompts";
import type { NodeWithScore } from "../../schema";
import { MetadataMode, TextNode } from "../../schema";
import { extractText } from "../../utils/llms";
import type {
BaseVectorStore,
MetadataFilter,
VectorStoreQuery,
} from "../../vector-store";
import { VectorStoreQueryMode } from "../../vector-store";
import type { MemoryMessage } from "../types";
import { BaseMemoryBlock, type MemoryBlockOptions } from "./base";
/**
* The options for the vector memory block.
*/
export type VectorMemoryBlockOptions = {
/**
* The vector store to use for retrieval.
*/
vectorStore: BaseVectorStore;
/**
* Maximum number of messages to include for context when retrieving.
* @default 5
*/
retrievalContextWindow?: number;
/**
* Template for formatting the retrieved information.
* @default new PromptTemplate({ template: "{{ text }}" })
*/
formatTemplate?: BasePromptTemplate;
/**
* List of node postprocessors to apply to the retrieved nodes containing messages.
*
* @default []
*/
nodePostprocessors?: BaseNodePostprocessor[];
/**
* Configuration options for vector store queries when retrieving memory.
*
* @default
* ```typescript
* {
* similarityTopK: 2, // Number of top similar results to return
* mode: VectorStoreQueryMode.DEFAULT, // Query mode for the vector store
* sessionFilterKey: "session_id", // Metadata key for session filtering
* filters: {
* filters: [
* { key: "session_id", value: "<current block id>", operator: "==" }
* ],
* condition: "and"
* }
* }
* ```
*
* Note: A session filter is automatically added to ensure memory isolation between blocks.
* If custom filters are provided, the session filter will be merged with them.
*/
queryOptions?: Partial<VectorMemoryBlockQueryOptions>;
} & MemoryBlockOptions;
export type VectorMemoryBlockQueryOptions = Omit<
VectorStoreQuery,
"queryEmbedding" | "queryStr"
> & {
sessionFilterKey: string;
};
/**
* A memory block that retrieves relevant information from a vector store.
*
* This block stores conversation history in a vector store and retrieves
* relevant information based on the most recent messages.
*/
export class VectorMemoryBlock<
TAdditionalMessageOptions extends object = object,
> extends BaseMemoryBlock<TAdditionalMessageOptions> {
private readonly vectorStore: BaseVectorStore;
private readonly retrievalContextWindow: number;
private readonly formatTemplate: BasePromptTemplate;
private readonly nodePostprocessors: BaseNodePostprocessor[];
private readonly queryOptions: VectorMemoryBlockQueryOptions;
constructor(options: VectorMemoryBlockOptions) {
super(options);
// Validate vector store
if (!options.vectorStore.storesText) {
throw new Error(
"vectorStore must store text to be used as a retrieval memory block",
);
}
this.vectorStore = options.vectorStore;
this.retrievalContextWindow = options.retrievalContextWindow ?? 5;
this.queryOptions = this.buildDefaultQueryOptions(options.queryOptions);
this.formatTemplate = options.formatTemplate ?? defaultContextSystemPrompt;
this.nodePostprocessors = options.nodePostprocessors ?? [];
}
get embedModel(): BaseEmbedding {
return this.vectorStore.embedModel;
}
async get(
messages: MemoryMessage<TAdditionalMessageOptions>[] = [],
): Promise<MemoryMessage<TAdditionalMessageOptions>[]> {
if (messages?.length === 0) return [];
// Use the last message or a context window of messages for the query
let context: MemoryMessage<TAdditionalMessageOptions>[];
if (
this.retrievalContextWindow > 1 &&
messages.length >= this.retrievalContextWindow
) {
context = messages.slice(-this.retrievalContextWindow);
} else {
context = messages;
}
const queryText = context
.map((message) => extractText(message.content))
.join("\n\n");
if (!queryText) return [];
// Create and execute the query
const queryEmbedding = await this.embedModel.getTextEmbedding(queryText);
const query: VectorStoreQuery = {
queryStr: queryText,
queryEmbedding,
...this.queryOptions,
};
const results = await this.vectorStore.query(query);
if (!results.nodes?.length) return [];
// Create nodes with scores
const nodesWithScores: NodeWithScore[] = results.nodes.map(
(node, index) => ({
node,
score: results.similarities?.[index] ?? undefined,
}),
);
// Apply postprocessors
let processedNodes = nodesWithScores;
for (const postprocessor of this.nodePostprocessors) {
processedNodes = await postprocessor.postprocessNodes(
processedNodes,
queryText,
);
}
// Format the results
const retrievedText = processedNodes
.map(({ node }) => node.getContent(MetadataMode.NONE))
.join("\n\n");
const formattedText = this.formatTemplate.format({
context: retrievedText,
});
// Return as memory message
return [
{
id: this.id,
role: "memory",
content: formattedText,
} as MemoryMessage<TAdditionalMessageOptions>,
];
}
async put(
messages: MemoryMessage<TAdditionalMessageOptions>[],
): Promise<void> {
if (messages.length === 0) return;
// Format messages with role, text content, and additional info
const texts: string[] = [];
for (const message of messages) {
const text = extractText(message.content);
if (!text) continue;
let messageText = text;
// Add additional info if present
const additionalInfo = (message.options ?? {}) as Record<string, unknown>;
if (Object.keys(additionalInfo).length > 0) {
messageText += `\nAdditional Info: (${JSON.stringify(additionalInfo)})`;
}
texts.push(`<message role='${message.role}'>${messageText}</message>`);
}
if (texts.length === 0) return;
// Create text node with session metadata
const textNode = new TextNode({
text: texts.join("\n"),
metadata: { [this.queryOptions.sessionFilterKey]: this.id },
});
// Get embedding for the text
textNode.embedding = await this.embedModel.getTextEmbedding(textNode.text);
// Add to vector store
await this.vectorStore.add([textNode]);
}
private buildDefaultQueryOptions(
options: Partial<VectorMemoryBlockQueryOptions> | undefined,
): VectorMemoryBlockQueryOptions {
const {
similarityTopK = 2,
mode = VectorStoreQueryMode.DEFAULT,
sessionFilterKey = "session_id",
} = options ?? {};
let filters = options?.filters;
const sessionFilter: MetadataFilter = {
key: sessionFilterKey,
value: this.id,
operator: "==",
};
if (filters) {
// Only add session_id filter if it doesn't exist in the filters list
const sessionIdFilterExists = filters.filters.some(
(filter) => filter.key === sessionFilterKey,
);
if (!sessionIdFilterExists) {
filters.filters.push(sessionFilter);
}
} else {
// If no filters are provided, add the session_id filter
filters = {
filters: [sessionFilter],
condition: "and",
};
}
return { ...options, similarityTopK, mode, sessionFilterKey, filters };
}
}
+15
View File
@@ -8,6 +8,10 @@ import {
StaticMemoryBlock,
type StaticMemoryBlockOptions,
} from "./block/static";
import {
VectorMemoryBlock,
type VectorMemoryBlockOptions,
} from "./block/vector";
import { DEFAULT_TOKEN_LIMIT, Memory, type MemoryOptions } from "./memory";
import type { MemoryMessage } from "./types";
@@ -115,6 +119,17 @@ export function factExtractionBlock<TMessageOptions extends object = object>(
return new FactExtractionMemoryBlock<TMessageOptions>(options);
}
/**
* create a VectorMemoryBlock
* @param options - Configuration options for the vector memory block
* @returns A new VectorMemoryBlock instance
*/
export function vectorBlock<TMessageOptions extends object = object>(
options: VectorMemoryBlockOptions,
): VectorMemoryBlock<TMessageOptions> {
return new VectorMemoryBlock<TMessageOptions>(options);
}
/**
* Creates a new Memory instance from a snapshot
* @param snapshot The snapshot to load from
+36 -3
View File
@@ -31,6 +31,13 @@ export type MemoryOptions<TMessageOptions extends object = object> = {
* Used internally for memory restoration from snapshots.
*/
memoryCursor?: number;
/**
* The default LLM to use for memory retrieval.
* If not provided, the default `Settings.llm` will be used.
* This default LLM can be overridden by the LLM passed in the `getLLM` method.
*/
llm?: LLM | undefined;
};
export class Memory<
@@ -65,6 +72,10 @@ export class Memory<
* The cursor for the messages that have been processed into long-term memory.
*/
private memoryCursor: number = 0;
/**
* The default LLM to use for memory retrieval.
*/
private llm: LLM | undefined;
constructor(
messages: MemoryMessage<TMessageOptions>[] = [],
@@ -76,6 +87,7 @@ export class Memory<
options.shortTermTokenLimitRatio ?? DEFAULT_SHORT_TERM_TOKEN_LIMIT_RATIO;
this.memoryBlocks = options.memoryBlocks ?? [];
this.memoryCursor = options.memoryCursor ?? 0;
this.initLLM(options.llm);
this.adapters = {
...options.customAdapters,
@@ -84,6 +96,15 @@ export class Memory<
} as TAdapters & BuiltinAdapters<TMessageOptions>;
}
private initLLM(llm: LLM | undefined) {
// safe initialize LLM without throwing error if Settings.llm hasn't been set yet
try {
this.llm = llm ?? Settings.llm;
} catch (error) {
this.llm = undefined;
}
}
/**
* Add a message to the memory
* @param message - The message to add to the memory
@@ -160,12 +181,13 @@ export class Memory<
/**
* Get the messages from the memory, optionally including transient messages.
* only return messages that are within context window of the LLM
* @param llm - To fit the result messages to the context window of the LLM. If not provided, the default token limit will be used.
* @param llm - To fit the result messages to the context window of the LLM (fallback to default llm if not provided).
* If llm is not specified in both the constructor and the method, the default token limit will be used.
* @param transientMessages - Optional transient messages to include.
* @returns The messages from the memory, optionally including transient messages.
*/
async getLLM(
llm?: LLM,
llm: LLM | undefined = this.llm,
transientMessages?: ChatMessage<TMessageOptions>[],
): Promise<ChatMessage[]> {
// Priority of result messages:
@@ -176,11 +198,20 @@ export class Memory<
? Math.ceil(contextWindow * DEFAULT_TOKEN_LIMIT_RATIO)
: this.tokenLimit;
let blockInputMessages = this.messages;
if (transientMessages && transientMessages.length > 0) {
blockInputMessages = [
...this.messages,
...transientMessages.map((m) => this.adapters.llamaindex.toMemory(m)),
];
}
// Start with fixed block messages (priority=0)
// as it must always be included in the retrieval result
const messages = await this.getMemoryBlockMessages(
this.memoryBlocks.filter((block) => block.priority === 0),
tokenLimit,
blockInputMessages,
);
// remaining token limit for short-term and memory blocks content
const remainingTokenLimit =
@@ -207,6 +238,7 @@ export class Memory<
const longTermBlockMessages = await this.getMemoryBlockMessages(
longTermBlocks,
memoryBlocksTokenLimit,
blockInputMessages,
);
messages.push(...longTermBlockMessages);
@@ -252,6 +284,7 @@ export class Memory<
private async getMemoryBlockMessages(
blocks: BaseMemoryBlock<TMessageOptions>[],
tokenLimit?: number,
messages?: MemoryMessage<TMessageOptions>[],
): Promise<ChatMessage<TMessageOptions>[]> {
if (blocks.length === 0) {
return [];
@@ -265,7 +298,7 @@ export class Memory<
let addedTokenCount = 0;
for (const block of sortedBlocks) {
try {
const content = await block.get();
const content = await block.get(messages);
for (const message of content) {
const chatMessage = this.adapters.llamaindex.fromMemory(message);
const messageTokenCount = this.countMessagesToken([chatMessage]);
+35
View File
@@ -56,10 +56,45 @@ export function prettifyError(error: unknown): string {
}
}
/**
* Returns a stringfied JSON with double quotes removed.
*
* @param value - The JSON value to stringify
* @returns The stringified JSON with no double quotes
*/
export function stringifyJSONToMessageContent(value: JSONValue): string {
return JSON.stringify(value, null, 2).replace(/"([^"]*)"/g, "$1");
}
export function assertIsJSONValue(value: unknown): asserts value is JSONValue {
if (
typeof value === "string" ||
typeof value === "number" ||
typeof value === "boolean"
) {
return;
}
if (Array.isArray(value)) {
for (const item of value) {
assertIsJSONValue(item);
}
return;
}
if (typeof value === "object" && value !== null) {
for (const [key, val] of Object.entries(value)) {
if (typeof key !== "string") {
throw new Error(`Invalid object key: ${key}`);
}
assertIsJSONValue(val);
}
return;
}
throw new Error(`Value is not a valid JSONValue: ${String(value)}`);
}
export {
extractDataUrlComponents,
extractImage,
+4 -1
View File
@@ -101,7 +101,9 @@ export type VectorStoreByType = {
};
export type VectorStoreBaseParams = {
// @deprecated: use embedModel instead
embeddingModel?: BaseEmbedding | undefined;
embedModel?: BaseEmbedding | undefined;
};
export abstract class BaseVectorStore<Client = unknown, T = unknown> {
@@ -117,7 +119,8 @@ export abstract class BaseVectorStore<Client = unknown, T = unknown> {
): Promise<VectorStoreQueryResult>;
protected constructor(params?: VectorStoreBaseParams) {
this.embedModel = params?.embeddingModel ?? Settings.embedModel;
this.embedModel =
params?.embedModel ?? params?.embeddingModel ?? Settings.embedModel;
}
}
+12
View File
@@ -1,5 +1,17 @@
# @llamaindex/experimental
## 0.0.198
### Patch Changes
- llamaindex@0.11.21
## 0.0.197
### Patch Changes
- llamaindex@0.11.20
## 0.0.196
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/experimental",
"description": "Experimental package for LlamaIndexTS",
"version": "0.0.196",
"version": "0.0.198",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
+21
View File
@@ -1,5 +1,26 @@
# llamaindex
## 0.11.21
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/cloud@4.0.26
- @llamaindex/node-parser@2.0.17
- @llamaindex/workflow@1.1.17
## 0.11.20
### Patch Changes
- Updated dependencies [a8ec08c]
- Updated dependencies [2967d57]
- @llamaindex/core@0.6.16
- @llamaindex/workflow@1.1.16
- @llamaindex/cloud@4.0.25
- @llamaindex/node-parser@2.0.16
## 0.11.19
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "llamaindex",
"version": "0.11.19",
"version": "0.11.21",
"license": "MIT",
"type": "module",
"keywords": [
@@ -272,7 +272,7 @@ export class SimpleVectorStore extends BaseVectorStore {
static async fromPersistPath(
persistPath: string,
embeddingModel?: BaseEmbedding,
embedModel?: BaseEmbedding,
): Promise<SimpleVectorStore> {
const dirPath = path.dirname(persistPath);
if (!(await exists(dirPath))) {
@@ -300,20 +300,20 @@ export class SimpleVectorStore extends BaseVectorStore {
data.textIdToRefDocId = dataDict.textIdToRefDocId ?? {};
// @ts-expect-error TS2322
data.metadataDict = dataDict.metadataDict ?? {};
const store = new SimpleVectorStore({ data, embeddingModel });
const store = new SimpleVectorStore({ data, embedModel });
store.persistPath = persistPath;
return store;
}
static fromDict(
saveDict: SimpleVectorStoreData,
embeddingModel?: BaseEmbedding,
embedModel?: BaseEmbedding,
): SimpleVectorStore {
const data = new SimpleVectorStoreData();
data.embeddingDict = saveDict.embeddingDict;
data.textIdToRefDocId = saveDict.textIdToRefDocId;
data.metadataDict = saveDict.metadataDict;
return new SimpleVectorStore({ data, embeddingModel });
return new SimpleVectorStore({ data, embedModel });
}
toDict(): SimpleVectorStoreData {
+19
View File
@@ -1,5 +1,24 @@
# @llamaindex/core-test
## 0.1.13
### Patch Changes
- @llamaindex/openai@0.4.12
## 0.1.12
### Patch Changes
- @llamaindex/openai@0.4.11
## 0.1.11
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.1.10
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/llamaindex-test",
"private": true,
"version": "0.1.10",
"version": "0.1.13",
"type": "module",
"scripts": {
"test": "vitest run"
@@ -59,7 +59,7 @@ describe("SimpleVectorStore", () => {
}),
];
store = new SimpleVectorStore({
embeddingModel: {} as BaseEmbedding, // Mocking the embedModel
embedModel: {} as BaseEmbedding, // Mocking the embedModel
data: {
embeddingDict: {},
textIdToRefDocId: {},
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/node-parser
## 2.0.17
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 2.0.16
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 2.0.15
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/node-parser",
"version": "2.0.15",
"version": "2.0.17",
"description": "Node parser for LlamaIndex",
"type": "module",
"exports": {
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/anthropic
## 0.3.19
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.3.18
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.3.17
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/anthropic",
"description": "Anthropic Adapter for LlamaIndex",
"version": "0.3.17",
"version": "0.3.19",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
@@ -1,5 +1,19 @@
# @llamaindex/assemblyai
## 0.1.16
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.1.15
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.1.14
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/assemblyai",
"description": "AssemblyAI Reader for LlamaIndex",
"version": "0.1.14",
"version": "0.1.16",
"type": "module",
"types": "dist/index.d.ts",
"main": "dist/index.cjs",
+15
View File
@@ -1,5 +1,20 @@
# @llamaindex/community
## 0.0.112
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.0.111
### Patch Changes
- 678b327: feat: added apac bedrock models
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.0.110
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/aws",
"description": "AWS package for LlamaIndexTS",
"version": "0.0.110",
"version": "0.0.112",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
@@ -134,6 +134,19 @@ export const INFERENCE_BEDROCK_MODELS = {
EU_AMAZON_NOVA_PRO_1: "eu.amazon.nova-pro-v1:0",
EU_AMAZON_NOVA_LITE_1: "eu.amazon.nova-lite-v1:0",
EU_AMAZON_NOVA_MICRO_1: "eu.amazon.nova-micro-v1:0",
APAC_ANTHROPIC_CLAUDE_3_5_SONNET:
"apac.anthropic.claude-3-5-sonnet-20240620-v1:0",
APAC_ANTHROPIC_CLAUDE_3_5_SONNET_V2:
"apac.anthropic.claude-3-5-sonnet-20241022-v2:0",
APAC_ANTHROPIC_CLAUDE_3_7_SONNET:
"apac.anthropic.claude-3-7-sonnet-20250219-v1:0",
APAC_ANTHROPIC_CLAUDE_3_HAIKU: "apac.anthropic.claude-3-haiku-20240307-v1:0",
APAC_ANTHROPIC_CLAUDE_3_SONNET:
"apac.anthropic.claude-3-sonnet-20240229-v1:0",
APAC_AMAZON_NOVA_PRO_1: "apac.amazon.nova-pro-v1:0",
APAC_AMAZON_NOVA_LITE_1: "apac.amazon.nova-lite-v1:0",
APAC_AMAZON_NOVA_MICRO_1: "apac.amazon.nova-micro-v1:0",
};
export type INFERENCE_BEDROCK_MODELS =
@@ -206,6 +219,24 @@ export const INFERENCE_TO_BEDROCK_MAP: Record<
BEDROCK_MODELS.AMAZON_NOVA_LITE_1,
[INFERENCE_BEDROCK_MODELS.EU_AMAZON_NOVA_MICRO_1]:
BEDROCK_MODELS.AMAZON_NOVA_MICRO_1,
[INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_5_SONNET]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_5_SONNET,
[INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_5_SONNET_V2]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_5_SONNET_V2,
[INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_7_SONNET]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_7_SONNET,
[INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_HAIKU]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_HAIKU,
[INFERENCE_BEDROCK_MODELS.APAC_ANTHROPIC_CLAUDE_3_SONNET]:
BEDROCK_MODELS.ANTHROPIC_CLAUDE_3_SONNET,
[INFERENCE_BEDROCK_MODELS.APAC_AMAZON_NOVA_PRO_1]:
BEDROCK_MODELS.AMAZON_NOVA_PRO_1,
[INFERENCE_BEDROCK_MODELS.APAC_AMAZON_NOVA_LITE_1]:
BEDROCK_MODELS.AMAZON_NOVA_LITE_1,
[INFERENCE_BEDROCK_MODELS.APAC_AMAZON_NOVA_MICRO_1]:
BEDROCK_MODELS.AMAZON_NOVA_MICRO_1,
};
/*
+23
View File
@@ -1,5 +1,28 @@
# @llamaindex/clip
## 0.0.68
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/openai@0.4.12
## 0.0.67
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
- @llamaindex/openai@0.4.11
## 0.0.66
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.65
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/clip",
"description": "Clip Embedding Adapter for LlamaIndex",
"version": "0.0.65",
"version": "0.0.68",
"type": "module",
"types": "dist/index.d.ts",
"main": "dist/index.cjs",
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/cohere
## 0.0.31
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.0.30
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.0.29
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/cohere",
"description": "Cohere Adapter for LlamaIndex",
"version": "0.0.29",
"version": "0.0.31",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+23
View File
@@ -1,5 +1,28 @@
# @llamaindex/deepinfra
## 0.0.68
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/openai@0.4.12
## 0.0.67
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
- @llamaindex/openai@0.4.11
## 0.0.66
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.65
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/deepinfra",
"description": "Deepinfra Adapter for LlamaIndex",
"version": "0.0.65",
"version": "0.0.68",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+19
View File
@@ -1,5 +1,24 @@
# @llamaindex/deepseek
## 0.0.29
### Patch Changes
- @llamaindex/openai@0.4.12
## 0.0.28
### Patch Changes
- @llamaindex/openai@0.4.11
## 0.0.27
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.26
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/deepseek",
"description": "DeepSeek Adapter for LlamaIndex",
"version": "0.0.26",
"version": "0.0.29",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/discord
## 0.1.16
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.1.15
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.1.14
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/discord",
"description": "Discord Reader for LlamaIndex",
"version": "0.1.14",
"version": "0.1.16",
"type": "module",
"types": "dist/index.d.ts",
"main": "dist/index.cjs",
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/excel
## 0.1.17
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.1.16
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.1.15
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/excel",
"description": "Excel Reader for LlamaIndex",
"version": "0.1.15",
"version": "0.1.17",
"type": "module",
"types": "dist/index.d.ts",
"main": "dist/index.cjs",
+19
View File
@@ -1,5 +1,24 @@
# @llamaindex/fireworks
## 0.0.28
### Patch Changes
- @llamaindex/openai@0.4.12
## 0.0.27
### Patch Changes
- @llamaindex/openai@0.4.11
## 0.0.26
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.25
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/fireworks",
"description": "Fireworks Adapter for LlamaIndex",
"version": "0.0.25",
"version": "0.0.28",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
+15
View File
@@ -1,5 +1,20 @@
# @llamaindex/google
## 0.3.16
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
## 0.3.15
### Patch Changes
- 650eeb1: fix: GeminiEmbedding should send batches of max 100
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
## 0.3.14
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/google",
"description": "Google Adapter for LlamaIndex",
"version": "0.3.14",
"version": "0.3.16",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
@@ -0,0 +1,248 @@
import { beforeEach, describe, expect, test, vi } from "vitest";
import {
DEFAULT_EMBED_BATCH_SIZE,
GEMINI_EMBEDDING_MODEL,
GeminiEmbedding,
} from "./GeminiEmbedding";
// Mock the Google GenAI module
const mockEmbedContent = vi.fn();
vi.mock("@google/genai", () => ({
GoogleGenAI: vi.fn().mockImplementation(() => ({
models: {
embedContent: mockEmbedContent,
},
})),
}));
describe("GeminiEmbedding", () => {
let geminiEmbedding: GeminiEmbedding;
// Move capturedBatches to outer scope so all tests can access it
// eslint-disable-next-line @typescript-eslint/no-explicit-any
let capturedBatches: any[];
beforeEach(() => {
vi.clearAllMocks();
geminiEmbedding = new GeminiEmbedding({
model: GEMINI_EMBEDDING_MODEL.EMBEDDING_001,
apiKey: "test-api-key",
});
// Default mock for other tests
mockEmbedContent.mockResolvedValue({
embeddings: [
{ values: [0.1, 0.2, 0.3] },
{ values: [0.4, 0.5, 0.6] },
{ values: [0.7, 0.8, 0.9] },
],
});
});
describe("getTextEmbeddingsBatch", () => {
beforeEach(() => {
// Reset and set up capturedBatches and the mock implementation for all tests in this suite
capturedBatches = [];
mockEmbedContent.mockImplementation((args) => {
capturedBatches.push({
...args,
contents: Array.isArray(args.contents)
? [...args.contents]
: args.contents,
});
return Promise.resolve({
embeddings: Array.from(
{ length: Array.isArray(args.contents) ? args.contents.length : 1 },
(_, i) => ({
values: [i * 0.1, i * 0.2, i * 0.3],
}),
),
});
});
});
test("should respect batch size limit of 10 for texts longer than 10", async () => {
// Create a list of 2.5x the batch size texts, to exceed the batch size
const texts = Array.from(
{ length: DEFAULT_EMBED_BATCH_SIZE * 2.5 },
(_, i) => `text ${i + 1}`,
);
await geminiEmbedding.getTextEmbeddingsBatch(texts);
// Verify that embedContent was called exactly 3 times (ceil(250/100) = 3)
expect(mockEmbedContent).toHaveBeenCalledTimes(3);
// Verify that each call had no more than 100 texts
const calls = mockEmbedContent.mock.calls;
// First batch should have DEFAULT_EMBED_BATCH_SIZE texts
expect(capturedBatches[0].contents).toHaveLength(
DEFAULT_EMBED_BATCH_SIZE,
);
expect(capturedBatches[0].contents).toEqual(
texts.slice(0 * DEFAULT_EMBED_BATCH_SIZE, 1 * DEFAULT_EMBED_BATCH_SIZE),
);
// Second batch should have DEFAULT_EMBED_BATCH_SIZE texts
expect(capturedBatches[1].contents).toHaveLength(
DEFAULT_EMBED_BATCH_SIZE,
);
expect(capturedBatches[1].contents).toEqual(
texts.slice(1 * DEFAULT_EMBED_BATCH_SIZE, 2 * DEFAULT_EMBED_BATCH_SIZE),
);
// Third batch should have 0.5 * DEFAULT_EMBED_BATCH_SIZE texts (remaining)
expect(capturedBatches[2].contents).toHaveLength(
DEFAULT_EMBED_BATCH_SIZE * 0.5,
);
expect(capturedBatches[2].contents).toEqual(
texts.slice(
2 * DEFAULT_EMBED_BATCH_SIZE,
2.5 * DEFAULT_EMBED_BATCH_SIZE,
),
);
});
test("should handle exactly DEFAULT_EMBED_BATCH_SIZE texts in a single batch", async () => {
const texts = Array.from(
{ length: DEFAULT_EMBED_BATCH_SIZE },
(_, i) => `text ${i + 1}`,
);
await geminiEmbedding.getTextEmbeddingsBatch(texts);
// Should be called exactly once
expect(mockEmbedContent).toHaveBeenCalledTimes(1);
// // Should contain all 100 texts
expect(capturedBatches[0]?.contents).toHaveLength(
DEFAULT_EMBED_BATCH_SIZE,
);
expect(capturedBatches[0]?.contents).toEqual(texts);
});
test("should handle texts shorter than batch size", async () => {
const short_batch_length = 5; // Less than DEFAULT_EMBED_BATCH_SIZE
const texts = Array.from(
{ length: short_batch_length },
(_, i) => `text ${i + 1}`,
);
await geminiEmbedding.getTextEmbeddingsBatch(texts);
// Should be called exactly once
expect(mockEmbedContent).toHaveBeenCalledTimes(1);
// Should contain all 5 texts
expect(capturedBatches[0].contents).toHaveLength(short_batch_length);
expect(capturedBatches[0].contents).toEqual(texts);
});
test("should handle large batches correctly (100 texts)", async () => {
const n_batches = 10;
const texts = Array.from(
{ length: DEFAULT_EMBED_BATCH_SIZE * n_batches },
(_, i) => `text ${i + 1}`,
);
await geminiEmbedding.getTextEmbeddingsBatch(texts);
// Should be called exactly 10 times
expect(mockEmbedContent).toHaveBeenCalledTimes(n_batches);
// Verify each batch has exactly DEFAULT_EMBED_BATCH_SIZE texts
for (let i = 0; i < n_batches; i++) {
expect(capturedBatches[i].contents).toHaveLength(
DEFAULT_EMBED_BATCH_SIZE,
);
expect(capturedBatches[i].contents).toEqual(
texts.slice(
i * DEFAULT_EMBED_BATCH_SIZE,
(i + 1) * DEFAULT_EMBED_BATCH_SIZE,
),
);
}
});
test("should return correct embeddings for all texts", async () => {
const texts = ["text1", "text2", "text3"];
mockEmbedContent.mockResolvedValueOnce({
embeddings: [
{ values: [0.1, 0.2, 0.3] },
{ values: [0.4, 0.5, 0.6] },
{ values: [0.7, 0.8, 0.9] },
],
});
const result = await geminiEmbedding.getTextEmbeddingsBatch(texts);
expect(result).toEqual([
[0.1, 0.2, 0.3],
[0.4, 0.5, 0.6],
[0.7, 0.8, 0.9],
]);
});
test("should handle empty embeddings gracefully", async () => {
const texts = ["text1", "text2"];
mockEmbedContent.mockResolvedValueOnce({
embeddings: [{ values: undefined }, { values: [0.1, 0.2, 0.3] }],
});
const result = await geminiEmbedding.getTextEmbeddingsBatch(texts);
expect(result).toEqual([[], [0.1, 0.2, 0.3]]);
});
test("should handle missing embeddings array", async () => {
const texts = ["text1"];
mockEmbedContent.mockResolvedValueOnce({
embeddings: undefined,
});
const result = await geminiEmbedding.getTextEmbeddingsBatch(texts);
expect(result).toEqual([]);
});
});
describe("getTextEmbedding", () => {
test("should call embedContent with single text", async () => {
const text = "single text";
mockEmbedContent.mockResolvedValueOnce({
embeddings: [{ values: [0.1, 0.2, 0.3] }],
});
const result = await geminiEmbedding.getTextEmbedding(text);
expect(mockEmbedContent).toHaveBeenCalledTimes(1);
expect(mockEmbedContent).toHaveBeenCalledWith({
model: GEMINI_EMBEDDING_MODEL.EMBEDDING_001,
contents: text,
});
expect(result).toEqual([0.1, 0.2, 0.3]);
});
});
describe("constructor", () => {
test("should set default model and batch size", () => {
const embedding = new GeminiEmbedding({ apiKey: "test-key" });
expect(embedding.model).toBe(GEMINI_EMBEDDING_MODEL.EMBEDDING_001);
expect(embedding.embedBatchSize).toBe(DEFAULT_EMBED_BATCH_SIZE);
});
test("should use provided model", () => {
const new_batch_size = 50;
const embedding = new GeminiEmbedding({
model: GEMINI_EMBEDDING_MODEL.TEXT_EMBEDDING_004,
apiKey: "test-key",
embedBatchSize: new_batch_size,
});
expect(embedding.model).toBe(GEMINI_EMBEDDING_MODEL.TEXT_EMBEDDING_004);
expect(embedding.embedBatchSize).toBe(new_batch_size);
});
});
});
@@ -1,5 +1,9 @@
import { GoogleGenAI, type GoogleGenAIOptions } from "@google/genai";
import { BaseEmbedding } from "@llamaindex/core/embeddings";
import {
BaseEmbedding,
batchEmbeddings,
type BaseEmbeddingOptions,
} from "@llamaindex/core/embeddings";
import { getEnv } from "@llamaindex/env";
export enum GEMINI_EMBEDDING_MODEL {
@@ -7,11 +11,15 @@ export enum GEMINI_EMBEDDING_MODEL {
TEXT_EMBEDDING_004 = "text-embedding-004",
}
// 100 is max batch size, see https://github.com/run-llama/LlamaIndexTS/pull/2099
export const DEFAULT_EMBED_BATCH_SIZE = 100;
/**
* Configuration options for GeminiEmbedding.
*/
export type GeminiEmbeddingOptions = {
model?: GEMINI_EMBEDDING_MODEL;
embedBatchSize?: number;
} & GoogleGenAIOptions;
/**
@@ -20,6 +28,7 @@ export type GeminiEmbeddingOptions = {
export class GeminiEmbedding extends BaseEmbedding {
model: GEMINI_EMBEDDING_MODEL;
ai: GoogleGenAI;
embedBatchSize: number = DEFAULT_EMBED_BATCH_SIZE;
constructor(opts?: GeminiEmbeddingOptions) {
super();
@@ -31,15 +40,27 @@ export class GeminiEmbedding extends BaseEmbedding {
this.ai = new GoogleGenAI({ ...opts, apiKey });
this.model = opts?.model ?? GEMINI_EMBEDDING_MODEL.EMBEDDING_001;
this.embedBatchSize = opts?.embedBatchSize ?? DEFAULT_EMBED_BATCH_SIZE;
}
async getTextEmbeddingsBatch(texts: string[]): Promise<number[][]> {
getTextEmbeddings = async (texts: string[]) => {
const result = await this.ai.models.embedContent({
model: this.model,
contents: texts,
});
return result.embeddings?.map((embedding) => embedding.values ?? []) ?? [];
};
async getTextEmbeddingsBatch(
texts: string[],
options?: BaseEmbeddingOptions,
): Promise<Array<number[]>> {
return await batchEmbeddings(
texts,
this.getTextEmbeddings.bind(this),
this.embedBatchSize,
options,
);
}
async getTextEmbedding(text: string): Promise<number[]> {
+19
View File
@@ -1,5 +1,24 @@
# @llamaindex/groq
## 0.0.84
### Patch Changes
- @llamaindex/openai@0.4.12
## 0.0.83
### Patch Changes
- @llamaindex/openai@0.4.11
## 0.0.82
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.81
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/groq",
"description": "Groq Adapter for LlamaIndex",
"version": "0.0.81",
"version": "0.0.84",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",
@@ -1,5 +1,28 @@
# @llamaindex/huggingface
## 0.1.22
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/openai@0.4.12
## 0.1.21
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
- @llamaindex/openai@0.4.11
## 0.1.20
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.1.19
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/huggingface",
"description": "Huggingface Adapter for LlamaIndex",
"version": "0.1.19",
"version": "0.1.22",
"type": "module",
"types": "dist/index.d.ts",
"main": "dist/index.cjs",
+23
View File
@@ -1,5 +1,28 @@
# @llamaindex/jinaai
## 0.0.28
### Patch Changes
- Updated dependencies [38da40b]
- @llamaindex/core@0.6.17
- @llamaindex/openai@0.4.12
## 0.0.27
### Patch Changes
- Updated dependencies [a8ec08c]
- @llamaindex/core@0.6.16
- @llamaindex/openai@0.4.11
## 0.0.26
### Patch Changes
- Updated dependencies [856dd8c]
- @llamaindex/openai@0.4.10
## 0.0.25
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/jinaai",
"description": "JinaAI Adapter for LlamaIndex",
"version": "0.0.25",
"version": "0.0.28",
"type": "module",
"main": "./dist/index.cjs",
"module": "./dist/index.js",

Some files were not shown because too many files have changed in this diff Show More