chore: Remove readers package from llamaindex (#1649)

This commit is contained in:
Marcus Schiesser
2025-02-12 17:16:41 +07:00
committed by GitHub
parent b49037612d
commit f4588bc770
31 changed files with 113 additions and 159 deletions
+8
View File
@@ -0,0 +1,8 @@
---
"llamaindex": minor
"@llamaindex/doc": minor
"@llamaindex/examples": minor
"@llamaindex/unit-test": minor
---
Remove readers package from llamaindex
+1
View File
@@ -0,0 +1 @@
LlamaIndexTS
+7 -33
View File
@@ -65,44 +65,18 @@ yarn add llamaindex
See our official document: <https://ts.llamaindex.ai/docs/llamaindex/getting_started/>
### Tips when using in non-Node.js environments
### Adding provider packages
When you are importing `llamaindex` in a non-Node.js environment(such as Vercel Edge, Cloudflare Workers, etc.)
Some classes are not exported from top-level entry file.
In most cases, you'll also need to install provider packages to use LlamaIndexTS. These are for adding AI models, file readers for ingestion or storing documents, e.g. in vector databases.
The reason is that some classes are only compatible with Node.js runtime,(e.g. `PDFReader`) which uses Node.js specific APIs(like `fs`, `child_process`, `crypto`).
For example, to use the OpenAI LLM, you would install the following package:
If you need any of those classes, you have to import them instead directly though their file path in the package.
Here's an example for importing the `PineconeVectorStore` class:
```typescript
import { PineconeVectorStore } from "llamaindex/vector-store/PineconeVectorStore";
```shell
npm install @llamaindex/openai
pnpm install @llamaindex/openai
yarn add @llamaindex/openai
```
As the `PDFReader` is not working with the Edge runtime, here's how to use the `SimpleDirectoryReader` with the `LlamaParseReader` to load PDFs:
```typescript
import { SimpleDirectoryReader } from "llamaindex/readers/SimpleDirectoryReader";
import { LlamaParseReader } from "llamaindex/readers/LlamaParseReader";
export const DATA_DIR = "./data";
export async function getDocuments() {
const reader = new SimpleDirectoryReader();
// Load PDFs using LlamaParseReader
return await reader.loadData({
directoryPath: DATA_DIR,
fileExtToReader: {
pdf: new LlamaParseReader({ resultType: "markdown" }),
},
});
}
```
> _Note_: Reader classes have to be added explictly to the `fileExtToReader` map in the Edge version of the `SimpleDirectoryReader`.
You'll find a complete example with LlamaIndexTS here: https://github.com/run-llama/create_llama_projects/tree/main/nextjs-edge-llamaparse
## Playground
Check out our NextJS playground at https://llama-playground.vercel.app/. The source is available at https://github.com/run-llama/ts-playground
@@ -15,7 +15,7 @@ In LlamaIndex, an agent is a semi-autonomous piece of software powered by an LLM
You'll need to have a recent version of [Node.js](https://nodejs.org/en) installed. Then you can install LlamaIndex.TS by running
```bash
npm install llamaindex
npm install llamaindex @llamaindex/openai @llamaindex/readers @llamaindex/huggingface
```
## Choose your model
@@ -40,7 +40,7 @@ We'll be bringing in `SimpleDirectoryReader`, `HuggingFaceEmbedding`, `VectorSto
import { FunctionTool, QueryEngineTool, Settings, VectorStoreIndex } from "llamaindex";
import { OpenAI, OpenAIAgent } from "@llamaindex/openai";
import { HuggingFaceEmbedding } from "@llamaindex/huggingface";
import { SimpleDirectoryReader } from "llamaindex";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
```
### Add an embedding model
@@ -10,7 +10,7 @@ import { Accordion, Accordions } from 'fumadocs-ui/components/accordion';
<Accordions>
<Accordion title="Install @llamaindex/readers">
If you want to only use reader modules, you can install `@llamaindex/readers`
If you want to use the reader module, you need to install `@llamaindex/readers`
<Tabs groupId="install-llamaindex" items={["npm", "yarn", "pnpm"]} persist>
```shell tab="npm"
@@ -31,72 +31,73 @@ import { Accordion, Accordions } from 'fumadocs-ui/components/accordion';
We offer readers for different file formats.
<Tabs groupId="llamaindex-or-readers" items={["llamaindex", "@llamaindex/readers"]} persist>
```ts twoslash tab="llamaindex"
import { CSVReader } from '@llamaindex/readers/csv'
import { PDFReader } from '@llamaindex/readers/pdf'
import { JSONReader } from '@llamaindex/readers/json'
import { MarkdownReader } from '@llamaindex/readers/markdown'
import { HTMLReader } from '@llamaindex/readers/html'
// you can find more readers in the documentation
```
```ts twoslash tab="@llamaindex/readers"
import { CSVReader } from '@llamaindex/readers/csv'
import { PDFReader } from '@llamaindex/readers/pdf'
import { JSONReader } from '@llamaindex/readers/json'
import { MarkdownReader } from '@llamaindex/readers/markdown'
import { HTMLReader } from '@llamaindex/readers/html'
// you can find more readers in the documentation
```
</Tabs>
```ts twoslash
import { CSVReader } from '@llamaindex/readers/csv'
import { PDFReader } from '@llamaindex/readers/pdf'
import { JSONReader } from '@llamaindex/readers/json'
import { MarkdownReader } from '@llamaindex/readers/markdown'
import { HTMLReader } from '@llamaindex/readers/html'
// you can find more readers in the documentation
```
## SimpleDirectoryReader
`SimpleDirectoryReader` is the simplest way to load data from local files into LlamaIndex.
<Tabs groupId="llamaindex-or-readers" items={["llamaindex", "@llamaindex/readers"]} persist>
```ts twoslash
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
```ts twoslash tab="llamaindex"
import { SimpleDirectoryReader } from "llamaindex";
const reader = new SimpleDirectoryReader()
const documents = await reader.loadData("./data")
// ^?
const reader = new SimpleDirectoryReader()
const documents = await reader.loadData("./data")
// ^?
const texts = documents.map(doc => doc.getText())
// ^?
```
```ts twoslash tab="@llamaindex/readers"
import { SimpleDirectoryReader } from "llamaindex";
const reader = new SimpleDirectoryReader()
const documents = await reader.loadData("./data")
// ^?
const texts = documents.map(doc => doc.getText())
// ^?
```
const texts = documents.map(doc => doc.getText())
// ^?
```
## Tips when using in non-Node.js environments
When using `@llamaindex/readers` in a non-Node.js environment (such as Vercel Edge, Cloudflare Workers, etc.)
Some classes are not exported from top-level entry file.
The reason is that some classes are only compatible with Node.js runtime, (e.g. `PDFReader`) which uses Node.js specific APIs (like `fs`, `child_process`, `crypto`).
If you need any of those classes, you have to import them instead directly through their file path in the package.
As the `PDFReader` is not working with the Edge runtime, here's how to use the `SimpleDirectoryReader` with the `LlamaParseReader` to load PDFs:
```typescript
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import { LlamaParseReader } from "@llamaindex/cloud";
export const DATA_DIR = "./data";
export async function getDocuments() {
const reader = new SimpleDirectoryReader();
// Load PDFs using LlamaParseReader
return await reader.loadData({
directoryPath: DATA_DIR,
fileExtToReader: {
pdf: new LlamaParseReader({ resultType: "markdown" }),
},
});
}
```
> _Note_: Reader classes have to be added explicitly to the `fileExtToReader` map in the Edge version of the `SimpleDirectoryReader`.
You'll find a complete example with LlamaIndexTS here: https://github.com/run-llama/create_llama_projects/tree/main/nextjs-edge-llamaparse
</Tabs>
## Load file natively using Node.js Customization Hooks
We have a helper utility to allow you to import a file in Node.js script.
<Tabs groupId="llamaindex-or-readers" items={["llamaindex", "@llamaindex/readers"]} persist>
```shell tab="llamaindex"
node --import llamaindex/register ./script.js
```
```shell tab="@llamaindex/readers"
node --import @llamaindex/readers/node ./script.js
```
</Tabs>
```shell
node --import @llamaindex/readers/node ./script.js
```
```ts
import csv from './path/to/data.csv';
@@ -34,7 +34,7 @@ import {
Settings,
} from "llamaindex";
import { OpenAI } from "@llamaindex/openai";
import { SimpleDirectoryReader } from "llamaindex";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
```
## Loading Data
@@ -124,7 +124,7 @@ import {
Settings,
} from "llamaindex";
import { OpenAI } from "@llamaindex/openai";
import { SimpleDirectoryReader } from "llamaindex";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
Settings.llm = new OpenAI();
Settings.nodeParser = new SentenceSplitter({
@@ -10,6 +10,7 @@
"dependencies": {
"llamaindex": "workspace:*",
"@llamaindex/huggingface": "workspace:*",
"@llamaindex/readers": "workspace:*",
"next": "15.0.3",
"react": "18.3.1",
"react-dom": "18.3.1"
@@ -1,11 +1,11 @@
"use server";
import { HuggingFaceEmbedding } from "@llamaindex/huggingface";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import {
OpenAI,
OpenAIAgent,
QueryEngineTool,
Settings,
SimpleDirectoryReader,
VectorStoreIndex,
} from "llamaindex";
@@ -9,6 +9,7 @@
"start": "waku start"
},
"dependencies": {
"@llamaindex/env": "workspace:*",
"llamaindex": "workspace:*",
"react": "19.0.0-rc-5c56b873-20241107",
"react-dom": "19.0.0-rc-5c56b873-20241107",
@@ -1,13 +1,14 @@
"use server";
import { fs } from "@llamaindex/env";
import { BaseQueryEngine, Document, VectorStoreIndex } from "llamaindex";
import { readFile } from "node:fs/promises";
let _queryEngine: BaseQueryEngine;
async function lazyLoadQueryEngine() {
if (!_queryEngine) {
const path = "node_modules/llamaindex/examples/abramov.txt";
const essay = await readFile(path, "utf-8");
const essay = await fs.readFile(path, "utf-8");
// Create Document object with essay
const document = new Document({ text: essay, id_: path });
+2 -5
View File
@@ -1,9 +1,6 @@
import { OpenAIAgent } from "@llamaindex/openai";
import {
QueryEngineTool,
SimpleDirectoryReader,
VectorStoreIndex,
} from "llamaindex";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import { QueryEngineTool, VectorStoreIndex } from "llamaindex";
async function main() {
// Load the documents
+1 -1
View File
@@ -1,9 +1,9 @@
import { OpenAIAgent } from "@llamaindex/openai";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import {
FunctionTool,
MetadataMode,
NodeWithScore,
SimpleDirectoryReader,
VectorStoreIndex,
} from "llamaindex";
+2 -5
View File
@@ -1,9 +1,6 @@
import { OpenAIAgent } from "@llamaindex/openai";
import {
QueryEngineTool,
SimpleDirectoryReader,
VectorStoreIndex,
} from "llamaindex";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import { QueryEngineTool, VectorStoreIndex } from "llamaindex";
async function main() {
// Load the documents
+1 -1
View File
@@ -1,4 +1,4 @@
import { SimpleDirectoryReader } from "llamaindex";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
function callback(
category: string,
+1 -1
View File
@@ -1,9 +1,9 @@
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import {
ImageDocument,
JinaAIEmbedding,
similarity,
SimilarityType,
SimpleDirectoryReader,
} from "llamaindex";
import path from "path";
+2 -1
View File
@@ -1,4 +1,5 @@
import { Settings, SimpleDirectoryReader, VectorStoreIndex } from "llamaindex";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import { Settings, VectorStoreIndex } from "llamaindex";
import path from "path";
import { getStorageContext } from "./storage";
+2 -5
View File
@@ -1,11 +1,8 @@
// load-docs.ts
import { PineconeVectorStore } from "@llamaindex/pinecone";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import fs from "fs/promises";
import {
SimpleDirectoryReader,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
async function getSourceFilenames(sourceDir: string) {
return await fs
@@ -1,10 +1,10 @@
import { TextFileReader } from "@llamaindex/readers/text";
import type { Document, Metadata } from "llamaindex";
import {
FILE_EXT_TO_READER,
FileReader,
SimpleDirectoryReader,
} from "llamaindex";
} from "@llamaindex/readers/directory";
import { TextFileReader } from "@llamaindex/readers/text";
import type { Document, Metadata } from "llamaindex";
import { FileReader } from "llamaindex";
class ZipReader extends FileReader {
loadDataAsContent(fileContent: Uint8Array): Promise<Document<Metadata>[]> {
@@ -1,5 +1,6 @@
import { LlamaParseReader } from "@llamaindex/cloud";
import { SimpleDirectoryReader, VectorStoreIndex } from "llamaindex";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import { VectorStoreIndex } from "llamaindex";
async function main() {
const reader = new SimpleDirectoryReader();
@@ -1,6 +1,4 @@
import { SimpleDirectoryReader } from "llamaindex";
// or
// import { SimpleDirectoryReader } from 'llamaindex'
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
const reader = new SimpleDirectoryReader();
const documents = await reader.loadData("../data");
+1 -1
View File
@@ -1,9 +1,9 @@
import { OpenAI } from "@llamaindex/openai";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import {
RouterQueryEngine,
SentenceSplitter,
Settings,
SimpleDirectoryReader,
SummaryIndex,
VectorStoreIndex,
} from "llamaindex";
+1 -1
View File
@@ -14,6 +14,7 @@ import {
MetadataIndexFieldType,
} from "@llamaindex/azure";
import { OpenAI, OpenAIEmbedding } from "@llamaindex/openai";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import dotenv from "dotenv";
import {
Document,
@@ -22,7 +23,6 @@ import {
Metadata,
NodeWithScore,
Settings,
SimpleDirectoryReader,
storageContextFromDefaults,
TextNode,
VectorStoreIndex,
+2 -5
View File
@@ -1,10 +1,7 @@
// load-docs.ts
import { PGVectorStore } from "@llamaindex/postgres";
import {
SimpleDirectoryReader,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
import fs from "node:fs/promises";
async function getSourceFilenames(sourceDir: string) {
+2 -5
View File
@@ -1,10 +1,7 @@
import { PGVectorStore } from "@llamaindex/postgres";
import { SimpleDirectoryReader } from "@llamaindex/readers/directory";
import dotenv from "dotenv";
import {
SimpleDirectoryReader,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";
import { storageContextFromDefaults, VectorStoreIndex } from "llamaindex";
dotenv.config();
-1
View File
@@ -25,7 +25,6 @@
"@llamaindex/env": "workspace:*",
"@llamaindex/node-parser": "workspace:*",
"@llamaindex/openai": "workspace:*",
"@llamaindex/readers": "workspace:*",
"@types/lodash": "^4.17.7",
"@types/node": "^22.9.0",
"ajv": "^8.17.1",
+3 -3
View File
@@ -1,9 +1,9 @@
export * from "./index.edge.js";
export * from "./readers/index.js";
export * from "./storage/index.js";
// TODO: clean up, move to jinaai package
export { JinaAIEmbedding } from "./embeddings/JinaAIEmbedding.js";
// Don't export SimpleVectorStore for non-node.js runtime on top level,
// Don't export file-system stores for non-node.js runtime on top level,
// as we cannot guarantee that they will work in other environments
export * from "./storage/index.js";
export * from "./vector-store.js";
-20
View File
@@ -1,20 +0,0 @@
export {
LlamaParseReader,
type Language,
type ResultType,
} from "@llamaindex/cloud/reader";
export * from "@llamaindex/readers/assembly-ai";
export * from "@llamaindex/readers/cosmosdb";
export * from "@llamaindex/readers/csv";
export * from "@llamaindex/readers/directory";
export * from "@llamaindex/readers/discord";
export * from "@llamaindex/readers/docx";
export * from "@llamaindex/readers/html";
export * from "@llamaindex/readers/image";
export * from "@llamaindex/readers/json";
export * from "@llamaindex/readers/markdown";
export * from "@llamaindex/readers/mongo";
export * from "@llamaindex/readers/notion";
export * from "@llamaindex/readers/obsidian";
export * from "@llamaindex/readers/pdf";
export * from "@llamaindex/readers/text";
+6 -3
View File
@@ -476,6 +476,9 @@ importers:
'@llamaindex/huggingface':
specifier: workspace:*
version: link:../../../packages/providers/huggingface
'@llamaindex/readers':
specifier: workspace:*
version: link:../../../packages/readers
llamaindex:
specifier: workspace:*
version: link:../../../packages/llamaindex
@@ -516,6 +519,9 @@ importers:
e2e/examples/waku-query-engine:
dependencies:
'@llamaindex/env':
specifier: workspace:*
version: link:../../../packages/env
llamaindex:
specifier: workspace:*
version: link:../../../packages/llamaindex
@@ -1034,9 +1040,6 @@ importers:
'@llamaindex/openai':
specifier: workspace:*
version: link:../providers/openai
'@llamaindex/readers':
specifier: workspace:*
version: link:../readers
'@types/lodash':
specifier: ^4.17.7
version: 4.17.15
+1 -1
View File
@@ -3,7 +3,7 @@ import { Document } from "@llamaindex/core/schema";
import {
SimpleCosmosDBReader,
type SimpleCosmosDBReaderLoaderConfig,
} from "llamaindex";
} from "@llamaindex/readers/cosmosdb";
import { describe, expect, it, vi } from "vitest";
const createMockClient = (mockData?: unknown[]) => {
+1 -1
View File
@@ -5,7 +5,7 @@ test("node register", async () => {
const code = `import csvDocument from '../examples/data/movie_reviews.csv';console.log(csvDocument.getText())`;
const cp = spawnSync(
process.execPath,
["--input-type=module", "--import=llamaindex/register"],
["--input-type=module", "--import=@llamaindex/readers/node"],
{
input: code,
stdio: "pipe",