Compare commits

..

9 Commits

Author SHA1 Message Date
github-actions[bot] 0664a99945 Release 0.5.16 (#1115)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-08-09 22:09:34 -07:00
Alex Yang 58abc5731b chore: update changeset 2024-08-09 22:06:43 -07:00
github-actions[bot] 7498b1e0f1 Release 0.5.15 (#1108)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-08-09 14:36:47 -07:00
Alex Yang 07a275fea5 chore: bump openai (#1113) 2024-08-09 12:56:30 -07:00
Alex Yang 1b6263e08d fix: export schema in top level (#1112) 2024-08-09 10:10:12 -07:00
Alex Yang 089f1d49c0 refactor: migrate reader type into core (#1111) 2024-08-09 09:53:50 -07:00
Thuc Pham 01c184c608 feat: add is_empty operator for filtering vector store (#1107) 2024-08-09 14:50:57 +07:00
github-actions[bot] 1752463ee6 Release 0.5.14 (#1103)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-08-07 17:03:00 -07:00
Peter Goldstein c825a2f743 Add gpt-4o-mini to Azure. Add 2024-06-01 API version for Azure (#1102) 2024-08-06 14:23:28 +07:00
59 changed files with 524 additions and 202 deletions
+22
View File
@@ -1,5 +1,27 @@
# docs
## 0.0.57
### Patch Changes
- Updated dependencies [58abc57]
- llamaindex@0.5.16
## 0.0.56
### Patch Changes
- Updated dependencies [01c184c]
- Updated dependencies [07a275f]
- llamaindex@0.5.15
## 0.0.55
### Patch Changes
- Updated dependencies [c825a2f]
- llamaindex@0.5.14
## 0.0.54
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "docs",
"version": "0.0.54",
"version": "0.0.57",
"private": true,
"scripts": {
"docusaurus": "docusaurus",
+8
View File
@@ -1,5 +1,13 @@
# @llamaindex/autotool
## 2.0.1
### Patch Changes
- 58abc57: fix: align version
- Updated dependencies [58abc57]
- llamaindex@0.5.16
## 2.0.0
### Patch Changes
@@ -1,5 +1,30 @@
# @llamaindex/autotool-02-next-example
## 0.1.41
### Patch Changes
- Updated dependencies [58abc57]
- @llamaindex/autotool@2.0.1
- llamaindex@0.5.16
## 0.1.40
### Patch Changes
- Updated dependencies [01c184c]
- Updated dependencies [07a275f]
- llamaindex@0.5.15
- @llamaindex/autotool@2.0.0
## 0.1.39
### Patch Changes
- Updated dependencies [c825a2f]
- llamaindex@0.5.14
- @llamaindex/autotool@2.0.0
## 0.1.38
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/autotool-02-next-example",
"private": true,
"version": "0.1.38",
"version": "0.1.41",
"scripts": {
"dev": "next dev",
"build": "next build",
+2 -2
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/autotool",
"type": "module",
"version": "2.0.0",
"version": "2.0.1",
"description": "auto transpile your JS function to LLM Agent compatible",
"files": [
"dist",
@@ -51,7 +51,7 @@
"unplugin": "^1.10.1"
},
"peerDependencies": {
"llamaindex": "^0.5.13",
"llamaindex": "^0.5.16",
"openai": "^4",
"typescript": "^4"
},
+6
View File
@@ -1,5 +1,11 @@
# @llamaindex/cloud
## 0.2.2
### Patch Changes
- 58abc57: fix: align version
## 0.2.1
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloud",
"version": "0.2.1",
"version": "0.2.2",
"type": "module",
"license": "MIT",
"scripts": {
+9
View File
@@ -1,5 +1,14 @@
# @llamaindex/community
## 0.0.29
### Patch Changes
- 58abc57: fix: align version
- Updated dependencies [58abc57]
- @llamaindex/core@0.1.8
- @llamaindex/env@0.1.9
## 0.0.28
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/community",
"description": "Community package for LlamaIndexTS",
"version": "0.0.28",
"version": "0.0.29",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
+8
View File
@@ -1,5 +1,13 @@
# @llamaindex/core
## 0.1.8
### Patch Changes
- 58abc57: fix: align version
- Updated dependencies [58abc57]
- @llamaindex/env@0.1.9
## 0.1.7
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/core",
"type": "module",
"version": "0.1.7",
"version": "0.1.8",
"description": "LlamaIndex Core Module",
"exports": {
"./node-parser": {
+1 -1
View File
@@ -1,4 +1,4 @@
export * from "./node";
export { TransformComponent } from "./type";
export { FileReader, TransformComponent, type BaseReader } from "./type";
export { EngineResponse } from "./type/engineresponse";
export * from "./zod";
+36 -2
View File
@@ -1,5 +1,5 @@
import { randomUUID } from "@llamaindex/env";
import type { BaseNode } from "./node";
import { fs, path, randomUUID } from "@llamaindex/env";
import type { BaseNode, Document } from "./node";
interface TransformComponentSignature {
<Options extends Record<string, unknown>>(
@@ -28,3 +28,37 @@ export class TransformComponent {
return transform;
}
}
/**
* A reader takes imports data into Document objects.
*/
export interface BaseReader {
loadData(...args: unknown[]): Promise<Document[]>;
}
/**
* A FileReader takes file paths and imports data into Document objects.
*/
export abstract class FileReader implements BaseReader {
abstract loadDataAsContent(
fileContent: Uint8Array,
fileName?: string,
): Promise<Document[]>;
async loadData(filePath: string): Promise<Document[]> {
const fileContent = await fs.readFile(filePath);
const fileName = path.basename(filePath);
const docs = await this.loadDataAsContent(fileContent, fileName);
docs.forEach(FileReader.addMetaData(filePath));
return docs;
}
static addMetaData(filePath: string) {
return (doc: Document, index: number) => {
// generate id as loadDataAsContent is only responsible for the content
doc.id_ = `${filePath}_${index + 1}`;
doc.metadata["file_path"] = path.resolve(filePath);
doc.metadata["file_name"] = path.basename(filePath);
};
}
}
+6
View File
@@ -1,5 +1,11 @@
# @llamaindex/env
## 0.1.9
### Patch Changes
- 58abc57: fix: align version
## 0.1.8
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/env",
"description": "environment wrapper, supports all JS environment including node, deno, bun, edge runtime, and cloudflare worker",
"version": "0.1.8",
"version": "0.1.9",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
+23
View File
@@ -1,5 +1,28 @@
# @llamaindex/experimental
## 0.0.66
### Patch Changes
- 58abc57: fix: align version
- Updated dependencies [58abc57]
- llamaindex@0.5.16
## 0.0.65
### Patch Changes
- Updated dependencies [01c184c]
- Updated dependencies [07a275f]
- llamaindex@0.5.15
## 0.0.64
### Patch Changes
- Updated dependencies [c825a2f]
- llamaindex@0.5.14
## 0.0.63
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/experimental",
"description": "Experimental package for LlamaIndexTS",
"version": "0.0.63",
"version": "0.0.66",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
+23
View File
@@ -1,5 +1,28 @@
# llamaindex
## 0.5.16
### Patch Changes
- 58abc57: fix: align version
- Updated dependencies [58abc57]
- @llamaindex/cloud@0.2.2
- @llamaindex/core@0.1.8
- @llamaindex/env@0.1.9
## 0.5.15
### Patch Changes
- 01c184c: Add is_empty operator for filtering vector store
- 07a275f: chore: bump openai
## 0.5.14
### Patch Changes
- c825a2f: Add gpt-4o-mini to Azure. Add 2024-06-01 API version for Azure
## 0.5.13
### Patch Changes
@@ -1,5 +1,27 @@
# @llamaindex/cloudflare-worker-agent-test
## 0.0.50
### Patch Changes
- Updated dependencies [58abc57]
- llamaindex@0.5.16
## 0.0.49
### Patch Changes
- Updated dependencies [01c184c]
- Updated dependencies [07a275f]
- llamaindex@0.5.15
## 0.0.48
### Patch Changes
- Updated dependencies [c825a2f]
- llamaindex@0.5.14
## 0.0.47
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloudflare-worker-agent-test",
"version": "0.0.47",
"version": "0.0.50",
"type": "module",
"private": true,
"scripts": {
@@ -1,5 +1,27 @@
# @llamaindex/next-agent-test
## 0.1.50
### Patch Changes
- Updated dependencies [58abc57]
- llamaindex@0.5.16
## 0.1.49
### Patch Changes
- Updated dependencies [01c184c]
- Updated dependencies [07a275f]
- llamaindex@0.5.15
## 0.1.48
### Patch Changes
- Updated dependencies [c825a2f]
- llamaindex@0.5.14
## 0.1.47
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-agent-test",
"version": "0.1.47",
"version": "0.1.50",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,27 @@
# test-edge-runtime
## 0.1.49
### Patch Changes
- Updated dependencies [58abc57]
- llamaindex@0.5.16
## 0.1.48
### Patch Changes
- Updated dependencies [01c184c]
- Updated dependencies [07a275f]
- llamaindex@0.5.15
## 0.1.47
### Patch Changes
- Updated dependencies [c825a2f]
- llamaindex@0.5.14
## 0.1.46
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/nextjs-edge-runtime-test",
"version": "0.1.46",
"version": "0.1.49",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,27 @@
# @llamaindex/next-node-runtime
## 0.0.31
### Patch Changes
- Updated dependencies [58abc57]
- llamaindex@0.5.16
## 0.0.30
### Patch Changes
- Updated dependencies [01c184c]
- Updated dependencies [07a275f]
- llamaindex@0.5.15
## 0.0.29
### Patch Changes
- Updated dependencies [c825a2f]
- llamaindex@0.5.14
## 0.0.28
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-node-runtime-test",
"version": "0.0.28",
"version": "0.0.31",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,27 @@
# @llamaindex/waku-query-engine-test
## 0.0.50
### Patch Changes
- Updated dependencies [58abc57]
- llamaindex@0.5.16
## 0.0.49
### Patch Changes
- Updated dependencies [01c184c]
- Updated dependencies [07a275f]
- llamaindex@0.5.15
## 0.0.48
### Patch Changes
- Updated dependencies [c825a2f]
- llamaindex@0.5.14
## 0.0.47
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/waku-query-engine-test",
"version": "0.0.47",
"version": "0.0.50",
"type": "module",
"private": true,
"scripts": {
+2 -2
View File
@@ -1,6 +1,6 @@
{
"name": "llamaindex",
"version": "0.5.13",
"version": "0.5.16",
"license": "MIT",
"type": "module",
"keywords": [
@@ -55,7 +55,7 @@
"md-utils-ts": "^2.0.0",
"mongodb": "^6.7.0",
"notion-md-crawler": "^1.0.0",
"openai": "^4.52.5",
"openai": "^4.55.3",
"papaparse": "^5.4.1",
"pathe": "^1.1.2",
"pg": "^8.12.0",
+2
View File
@@ -1,6 +1,8 @@
import type { AgentEndEvent, AgentStartEvent } from "./agent/types.js";
import type { RetrievalEndEvent, RetrievalStartEvent } from "./llm/types.js";
export * from "@llamaindex/core/schema";
declare module "@llamaindex/core/global" {
export interface LlamaIndexEventMaps {
"retrieve-start": RetrievalStartEvent;
@@ -1,4 +1,4 @@
import type { TransformComponent } from "@llamaindex/core/schema";
import type { BaseReader, TransformComponent } from "@llamaindex/core/schema";
import {
ModalityType,
splitNodesByType,
@@ -6,7 +6,6 @@ import {
type Document,
type Metadata,
} from "@llamaindex/core/schema";
import type { BaseReader } from "../readers/type.js";
import type { BaseDocumentStore } from "../storage/docStore/types.js";
import type {
VectorStore,
@@ -107,6 +106,7 @@ export class IngestionPipeline {
inputNodes.push(this.documents);
}
if (this.reader) {
// fixme: empty parameter might cause error
inputNodes.push(await this.reader.loadData());
}
return inputNodes.flat();
+6
View File
@@ -18,6 +18,7 @@ const ALL_AZURE_OPENAI_CHAT_MODELS = {
openAIModel: "gpt-3.5-turbo-16k",
},
"gpt-4o": { contextWindow: 128000, openAIModel: "gpt-4o" },
"gpt-4o-mini": { contextWindow: 128000, openAIModel: "gpt-4o-mini" },
"gpt-4": { contextWindow: 8192, openAIModel: "gpt-4" },
"gpt-4-32k": { contextWindow: 32768, openAIModel: "gpt-4-32k" },
"gpt-4-turbo": {
@@ -40,6 +41,10 @@ const ALL_AZURE_OPENAI_CHAT_MODELS = {
contextWindow: 128000,
openAIModel: "gpt-4o-2024-05-13",
},
"gpt-4o-mini-2024-07-18": {
contextWindow: 128000,
openAIModel: "gpt-4o-mini-2024-07-18",
},
};
const ALL_AZURE_OPENAI_EMBEDDING_MODELS = {
@@ -73,6 +78,7 @@ const ALL_AZURE_API_VERSIONS = [
"2024-03-01-preview",
"2024-04-01-preview",
"2024-05-01-preview",
"2024-06-01",
];
const DEFAULT_API_VERSION = "2023-05-15";
+19 -8
View File
@@ -6,6 +6,7 @@ import type {
ClientOptions as OpenAIClientOptions,
} from "openai";
import { AzureOpenAI, OpenAI as OrigOpenAI } from "openai";
import type { ChatModel } from "openai/resources/chat/chat";
import {
type BaseTool,
@@ -108,16 +109,24 @@ export const GPT4_MODELS = {
"gpt-4o-2024-05-13": { contextWindow: 128000 },
"gpt-4o-mini": { contextWindow: 128000 },
"gpt-4o-mini-2024-07-18": { contextWindow: 128000 },
"gpt-4o-2024-08-06": { contextWindow: 128000 },
"gpt-4o-2024-09-14": { contextWindow: 128000 },
"gpt-4o-2024-10-14": { contextWindow: 128000 },
"gpt-4-0613": { contextWindow: 128000 },
"gpt-4-turbo-2024-04-09": { contextWindow: 128000 },
"gpt-4-0314": { contextWindow: 128000 },
"gpt-4-32k-0314": { contextWindow: 32768 },
};
// NOTE we don't currently support gpt-3.5-turbo-instruct and don't plan to in the near future
export const GPT35_MODELS = {
"gpt-3.5-turbo": { contextWindow: 4096 },
"gpt-3.5-turbo": { contextWindow: 16385 },
"gpt-3.5-turbo-0613": { contextWindow: 4096 },
"gpt-3.5-turbo-16k": { contextWindow: 16384 },
"gpt-3.5-turbo-16k-0613": { contextWindow: 16384 },
"gpt-3.5-turbo-1106": { contextWindow: 16384 },
"gpt-3.5-turbo-0125": { contextWindow: 16384 },
"gpt-3.5-turbo-16k": { contextWindow: 16385 },
"gpt-3.5-turbo-16k-0613": { contextWindow: 16385 },
"gpt-3.5-turbo-1106": { contextWindow: 16385 },
"gpt-3.5-turbo-0125": { contextWindow: 16385 },
"gpt-3.5-turbo-0301": { contextWindow: 16385 },
};
/**
@@ -126,7 +135,7 @@ export const GPT35_MODELS = {
export const ALL_AVAILABLE_OPENAI_MODELS = {
...GPT4_MODELS,
...GPT35_MODELS,
};
} satisfies Record<ChatModel, { contextWindow: number }>;
export function isFunctionCallingModel(llm: LLM): llm is OpenAI {
let model: string;
@@ -157,8 +166,10 @@ export type OpenAIAdditionalChatOptions = Omit<
>;
export class OpenAI extends ToolCallLLM<OpenAIAdditionalChatOptions> {
// Per completion OpenAI params
model: keyof typeof ALL_AVAILABLE_OPENAI_MODELS | string;
model:
| ChatModel
// string & {} is a hack to allow any string, but still give autocomplete
| (string & {});
temperature: number;
topP: number;
maxTokens?: number;
@@ -1,4 +1,4 @@
import { Document } from "@llamaindex/core/schema";
import { type BaseReader, Document } from "@llamaindex/core/schema";
import { getEnv } from "@llamaindex/env";
import type {
BaseServiceParams,
@@ -8,7 +8,6 @@ import type {
TranscriptSentence,
} from "assemblyai";
import { AssemblyAI } from "assemblyai";
import type { BaseReader } from "./type.js";
type AssemblyAIOptions = Partial<BaseServiceParams>;
const defaultOptions = {
+1 -2
View File
@@ -1,7 +1,6 @@
import { Document } from "@llamaindex/core/schema";
import { type BaseReader, Document, FileReader } from "@llamaindex/core/schema";
import type { ParseConfig } from "papaparse";
import Papa from "papaparse";
import { FileReader } from "./type.js";
/**
* papaparse-based csv parser
@@ -1,5 +1,5 @@
import { REST, type RESTOptions } from "@discordjs/rest";
import { Document } from "@llamaindex/core/schema";
import { Document, type BaseReader } from "@llamaindex/core/schema";
import { getEnv } from "@llamaindex/env";
import { Routes, type APIEmbed, type APIMessage } from "discord-api-types/v10";
@@ -7,7 +7,7 @@ import { Routes, type APIEmbed, type APIMessage } from "discord-api-types/v10";
* Represents a reader for Discord messages using @discordjs/rest
* See https://github.com/discordjs/discord.js/tree/main/packages/rest
*/
export class DiscordReader {
export class DiscordReader implements BaseReader {
private client: REST;
constructor(
@@ -1,6 +1,5 @@
import { Document } from "@llamaindex/core/schema";
import { Document, FileReader } from "@llamaindex/core/schema";
import mammoth from "mammoth";
import { FileReader } from "./type.js";
export class DocxReader extends FileReader {
/** DocxParser */
@@ -1,6 +1,4 @@
import { Document } from "@llamaindex/core/schema";
import { FileReader } from "./type.js";
import { Document, FileReader } from "@llamaindex/core/schema";
/**
* Extract the significant text from an arbitrary HTML document.
* The contents of any head, script, style, and xml tags are removed completely.
@@ -1,6 +1,5 @@
import type { Document } from "@llamaindex/core/schema";
import { ImageDocument } from "@llamaindex/core/schema";
import { FileReader } from "./type.js";
import { FileReader, ImageDocument } from "@llamaindex/core/schema";
/**
* Reads the content of an image file into a Document object (which stores the image file as a Blob).
@@ -1,7 +1,5 @@
import type { JSONValue } from "@llamaindex/core/global";
import { Document } from "@llamaindex/core/schema";
import { FileReader } from "./type.js";
import { Document, FileReader } from "@llamaindex/core/schema";
export interface JSONReaderOptions {
/**
* Whether to ensure only ASCII characters.
@@ -1,7 +1,92 @@
import { Document } from "@llamaindex/core/schema";
import { Document, FileReader } from "@llamaindex/core/schema";
import { fs, getEnv } from "@llamaindex/env";
import { filetypeinfo } from "magic-bytes.js";
import { FileReader, type Language, type ResultType } from "./type.js";
export type ResultType = "text" | "markdown" | "json";
export type Language =
| "abq"
| "ady"
| "af"
| "ang"
| "ar"
| "as"
| "ava"
| "az"
| "be"
| "bg"
| "bh"
| "bho"
| "bn"
| "bs"
| "ch_sim"
| "ch_tra"
| "che"
| "cs"
| "cy"
| "da"
| "dar"
| "de"
| "en"
| "es"
| "et"
| "fa"
| "fr"
| "ga"
| "gom"
| "hi"
| "hr"
| "hu"
| "id"
| "inh"
| "is"
| "it"
| "ja"
| "kbd"
| "kn"
| "ko"
| "ku"
| "la"
| "lbe"
| "lez"
| "lt"
| "lv"
| "mah"
| "mai"
| "mi"
| "mn"
| "mr"
| "ms"
| "mt"
| "ne"
| "new"
| "nl"
| "no"
| "oc"
| "pi"
| "pl"
| "pt"
| "ro"
| "ru"
| "rs_cyrillic"
| "rs_latin"
| "sck"
| "sk"
| "sl"
| "sq"
| "sv"
| "sw"
| "ta"
| "tab"
| "te"
| "th"
| "tjk"
| "tl"
| "tr"
| "ug"
| "uk"
| "ur"
| "uz"
| "vi";
const SUPPORT_FILE_EXT: string[] = [
".pdf",
@@ -1,5 +1,4 @@
import { Document } from "@llamaindex/core/schema";
import { FileReader } from "./type.js";
import { Document, FileReader } from "@llamaindex/core/schema";
type MarkdownTuple = [string | null, string];
@@ -1,7 +1,7 @@
import type { BaseReader } from "@llamaindex/core/schema";
import { Document } from "@llamaindex/core/schema";
import type { Crawler, CrawlerOptions, Page } from "notion-md-crawler";
import { crawler, pageToString } from "notion-md-crawler";
import type { BaseReader } from "./type.js";
type NotionReaderOptions = Pick<CrawlerOptions, "client" | "serializers">;
+1 -2
View File
@@ -1,5 +1,4 @@
import { Document } from "@llamaindex/core/schema";
import { FileReader } from "./type.js";
import { Document, FileReader } from "@llamaindex/core/schema";
/**
* Read the text of a PDF
@@ -1,8 +1,8 @@
import type { BaseReader, FileReader } from "@llamaindex/core/schema";
import { Document } from "@llamaindex/core/schema";
import { path } from "@llamaindex/env";
import { walk } from "../storage/FileSystem.js";
import { TextFileReader } from "./TextFileReader.js";
import type { BaseReader, FileReader } from "./type.js";
import pLimit from "./utils.js";
type ReaderCallback = (
@@ -1,3 +1,4 @@
import type { FileReader } from "@llamaindex/core/schema";
import { Document } from "@llamaindex/core/schema";
import { PapaCSVReader } from "./CSVReader.js";
import { DocxReader } from "./DocxReader.js";
@@ -10,7 +11,6 @@ import {
type SimpleDirectoryReaderLoadDataParams,
} from "./SimpleDirectoryReader.edge.js";
import { TextFileReader } from "./TextFileReader.js";
import type { FileReader } from "./type.js";
export const FILE_EXT_TO_READER: Record<string, FileReader> = {
txt: new TextFileReader(),
@@ -1,7 +1,6 @@
import type { Metadata } from "@llamaindex/core/schema";
import { Document } from "@llamaindex/core/schema";
import { type BaseReader, Document } from "@llamaindex/core/schema";
import type { MongoClient } from "mongodb";
import type { BaseReader } from "./type.js";
/**
* Read in from MongoDB
@@ -1,6 +1,4 @@
import { Document } from "@llamaindex/core/schema";
import { FileReader } from "./type.js";
import { Document, FileReader } from "@llamaindex/core/schema";
/**
* Read a .txt file
*/
-124
View File
@@ -1,124 +0,0 @@
import type { Document } from "@llamaindex/core/schema";
import { fs, path } from "@llamaindex/env";
/**
* A reader takes imports data into Document objects.
*/
export interface BaseReader {
loadData(...args: unknown[]): Promise<Document[]>;
}
/**
* A FileReader takes file paths and imports data into Document objects.
*/
export abstract class FileReader implements BaseReader {
abstract loadDataAsContent(
fileContent: Uint8Array,
fileName?: string,
): Promise<Document[]>;
async loadData(filePath: string): Promise<Document[]> {
const fileContent = await fs.readFile(filePath);
const fileName = path.basename(filePath);
const docs = await this.loadDataAsContent(fileContent, fileName);
docs.forEach(FileReader.addMetaData(filePath));
return docs;
}
static addMetaData(filePath: string) {
return (doc: Document, index: number) => {
// generate id as loadDataAsContent is only responsible for the content
doc.id_ = `${filePath}_${index + 1}`;
doc.metadata["file_path"] = path.resolve(filePath);
doc.metadata["file_name"] = path.basename(filePath);
};
}
}
// For LlamaParseReader.ts
export type ResultType = "text" | "markdown" | "json";
export type Language =
| "abq"
| "ady"
| "af"
| "ang"
| "ar"
| "as"
| "ava"
| "az"
| "be"
| "bg"
| "bh"
| "bho"
| "bn"
| "bs"
| "ch_sim"
| "ch_tra"
| "che"
| "cs"
| "cy"
| "da"
| "dar"
| "de"
| "en"
| "es"
| "et"
| "fa"
| "fr"
| "ga"
| "gom"
| "hi"
| "hr"
| "hu"
| "id"
| "inh"
| "is"
| "it"
| "ja"
| "kbd"
| "kn"
| "ko"
| "ku"
| "la"
| "lbe"
| "lez"
| "lt"
| "lv"
| "mah"
| "mai"
| "mi"
| "mn"
| "mr"
| "ms"
| "mt"
| "ne"
| "new"
| "nl"
| "no"
| "oc"
| "pi"
| "pl"
| "pt"
| "ro"
| "ru"
| "rs_cyrillic"
| "rs_latin"
| "sck"
| "sk"
| "sl"
| "sq"
| "sv"
| "sw"
| "ta"
| "tab"
| "te"
| "th"
| "tjk"
| "tl"
| "tr"
| "ug"
| "uk"
| "ur"
| "uz"
| "vi";
@@ -273,7 +273,7 @@ export class PGVectorStore
const paramIndex = params.length + 1;
whereClauses.push(`metadata->>'${filter.key}' = $${paramIndex}`);
// TODO: support filter with other operators
if (!Array.isArray(filter.value)) {
if (!Array.isArray(filter.value) && filter.value) {
params.push(filter.value);
}
});
@@ -36,7 +36,7 @@ type MetadataValue = Record<string, any>;
// Mapping of filter operators to metadata filter functions
const OPERATOR_TO_FILTER: {
[key in FilterOperator]: (
[key in FilterOperator]?: (
{ key, value }: MetadataFilter,
metadata: MetadataValue,
) => boolean;
@@ -94,7 +94,20 @@ const buildFilterFn = (
const queryCondition = condition || "and"; // default to and
const itemFilterFn = (filter: MetadataFilter): boolean => {
if (metadata[filter.key] === undefined) return false; // always return false if the metadata key is not present
if (filter.operator === FilterOperator.IS_EMPTY) {
// for `is_empty` operator, return true if the metadata key is not present or the value is empty
const value = metadata[filter.key];
return (
value === undefined ||
value === null ||
value === "" ||
(Array.isArray(value) && value.length === 0)
);
}
if (metadata[filter.key] === undefined) {
// for other operators, always return false if the metadata key is not present
return false;
}
const metadataLookupFn = OPERATOR_TO_FILTER[filter.operator];
if (!metadataLookupFn)
throw new Error(`Unsupported operator: ${filter.operator}`);
@@ -33,6 +33,7 @@ export enum FilterOperator {
ALL = "all", // Contains all (array of strings)
TEXT_MATCH = "text_match", // full text match (allows you to search for a specific substring, token or phrase within the text field)
CONTAINS = "contains", // metadata array contains value (string or number)
IS_EMPTY = "is_empty", // the field is not exist or empty (null or empty array)
}
export enum FilterCondition {
@@ -44,7 +45,7 @@ export type MetadataFilterValue = string | number | string[] | number[];
export interface MetadataFilter {
key: string;
value: MetadataFilterValue;
value?: MetadataFilterValue;
operator: `${FilterOperator}`; // ==, any, all,...
}
@@ -80,7 +80,7 @@ export function metadataDictToNode(
}
export const parsePrimitiveValue = (
value: MetadataFilterValue,
value?: MetadataFilterValue,
): string | number => {
if (typeof value !== "number" && typeof value !== "string") {
throw new Error("Value must be a string or number");
@@ -89,7 +89,7 @@ export const parsePrimitiveValue = (
};
export const parseArrayValue = (
value: MetadataFilterValue,
value?: MetadataFilterValue,
): string[] | number[] => {
const isPrimitiveArray =
Array.isArray(value) &&
+6
View File
@@ -1,5 +1,11 @@
# @llamaindex/core-test
## 0.0.7
### Patch Changes
- 01c184c: Add is_empty operator for filtering vector store
## 0.0.6
### Patch Changes
+11
View File
@@ -0,0 +1,11 @@
import { expect, test } from "vitest";
test("Node classes should be included in the top level", async () => {
const { Document, IndexNode, TextNode, BaseNode } = await import(
"llamaindex"
);
expect(Document).toBeDefined();
expect(IndexNode).toBeDefined();
expect(TextNode).toBeDefined();
expect(BaseNode).toBeDefined();
});
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/llamaindex-test",
"private": true,
"version": "0.0.6",
"version": "0.0.7",
"type": "module",
"scripts": {
"test": "vitest run"
@@ -256,6 +256,18 @@ describe("SimpleVectorStore", () => {
},
expected: 1,
},
{
title: "Filter IS_EMPTY",
filters: {
filters: [
{
key: "not-exist-key",
operator: "is_empty",
},
],
},
expected: 3,
},
{
title: "Filter OR",
filters: {
+50 -12
View File
@@ -148,7 +148,7 @@ importers:
version: 2.4.4
chromadb:
specifier: ^1.8.1
version: 1.8.1(@google/generative-ai@0.12.0)(cohere-ai@7.10.6(@aws-sdk/client-sso-oidc@3.613.0(@aws-sdk/client-sts@3.613.0))(encoding@0.1.13))(encoding@0.1.13)(openai@4.52.5(encoding@0.1.13))
version: 1.8.1(@google/generative-ai@0.12.0)(cohere-ai@7.10.6(@aws-sdk/client-sso-oidc@3.613.0(@aws-sdk/client-sts@3.613.0))(encoding@0.1.13))(encoding@0.1.13)(openai@4.55.3(encoding@0.1.13)(zod@3.23.8))
commander:
specifier: ^12.1.0
version: 12.1.0
@@ -272,7 +272,7 @@ importers:
version: 1.1.0(@types/react@18.3.3)(react@18.3.1)
ai:
specifier: ^3.2.1
version: 3.2.19(openai@4.52.5)(react@18.3.1)(svelte@4.2.18)(vue@3.4.31(typescript@5.5.3))(zod@3.23.8)
version: 3.2.19(openai@4.55.3(zod@3.23.8))(react@18.3.1)(svelte@4.2.18)(vue@3.4.31(typescript@5.5.3))(zod@3.23.8)
class-variance-authority:
specifier: ^0.7.0
version: 0.7.0
@@ -541,7 +541,7 @@ importers:
version: 4.6.0
chromadb:
specifier: 1.8.1
version: 1.8.1(@google/generative-ai@0.12.0)(cohere-ai@7.10.6(@aws-sdk/client-sso-oidc@3.613.0(@aws-sdk/client-sts@3.613.0))(encoding@0.1.13))(encoding@0.1.13)(openai@4.52.5(encoding@0.1.13))
version: 1.8.1(@google/generative-ai@0.12.0)(cohere-ai@7.10.6(@aws-sdk/client-sso-oidc@3.613.0(@aws-sdk/client-sts@3.613.0))(encoding@0.1.13))(encoding@0.1.13)(openai@4.55.3(encoding@0.1.13)(zod@3.23.8))
cohere-ai:
specifier: 7.10.6
version: 7.10.6(@aws-sdk/client-sso-oidc@3.613.0(@aws-sdk/client-sts@3.613.0))(encoding@0.1.13)
@@ -573,8 +573,8 @@ importers:
specifier: ^1.0.0
version: 1.0.0(encoding@0.1.13)
openai:
specifier: ^4.52.5
version: 4.52.5(encoding@0.1.13)
specifier: ^4.55.3
version: 4.55.3(encoding@0.1.13)(zod@3.23.8)
papaparse:
specifier: ^5.4.1
version: 5.4.1
@@ -684,7 +684,7 @@ importers:
dependencies:
ai:
specifier: ^3.2.1
version: 3.2.19(openai@4.52.5)(react@18.3.1)(svelte@4.2.18)(vue@3.4.31(typescript@5.5.3))(zod@3.23.8)
version: 3.2.19(openai@4.55.3(zod@3.23.8))(react@18.3.1)(svelte@4.2.18)(vue@3.4.31(typescript@5.5.3))(zod@3.23.8)
llamaindex:
specifier: workspace:*
version: link:../../..
@@ -8330,6 +8330,15 @@ packages:
resolution: {integrity: sha512-qqH8GsyPE3z06took/2uWOGqRcrZNlRoPAsihpg4jsl0+2Dfelnw6HDDMep0EI2Cfzw75nn3vHRZehep/IZzxg==}
hasBin: true
openai@4.55.3:
resolution: {integrity: sha512-/IUDdK5w3aB1Kd88Ml7w5F+EkVM5aXlrY+lSpWXdIPL18CmGkC7lV9HFJ7beR0OUSFLFT0qmWvMynqtbMF06/Q==}
hasBin: true
peerDependencies:
zod: ^3.23.8
peerDependenciesMeta:
zod:
optional: true
opener@1.5.2:
resolution: {integrity: sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A==}
hasBin: true
@@ -15720,7 +15729,7 @@ snapshots:
'@vue/shared': 3.4.31
estree-walker: 2.0.2
magic-string: 0.30.11
postcss: 8.4.40
postcss: 8.4.39
source-map-js: 1.2.0
'@vue/compiler-ssr@3.4.31':
@@ -15911,7 +15920,7 @@ snapshots:
clean-stack: 2.2.0
indent-string: 4.0.0
ai@3.2.19(openai@4.52.5)(react@18.3.1)(svelte@4.2.18)(vue@3.4.31(typescript@5.5.3))(zod@3.23.8):
ai@3.2.19(openai@4.55.3(zod@3.23.8))(react@18.3.1)(svelte@4.2.18)(vue@3.4.31(typescript@5.5.3))(zod@3.23.8):
dependencies:
'@ai-sdk/provider': 0.0.12
'@ai-sdk/provider-utils': 1.0.2(zod@3.23.8)
@@ -15928,7 +15937,7 @@ snapshots:
sswr: 2.1.0(svelte@4.2.18)
zod-to-json-schema: 3.22.5(zod@3.23.8)
optionalDependencies:
openai: 4.52.5(encoding@0.1.13)
openai: 4.55.3(zod@3.23.8)
react: 18.3.1
svelte: 4.2.18
zod: 3.23.8
@@ -16623,14 +16632,14 @@ snapshots:
chownr@2.0.0: {}
chromadb@1.8.1(@google/generative-ai@0.12.0)(cohere-ai@7.10.6(@aws-sdk/client-sso-oidc@3.613.0(@aws-sdk/client-sts@3.613.0))(encoding@0.1.13))(encoding@0.1.13)(openai@4.52.5(encoding@0.1.13)):
chromadb@1.8.1(@google/generative-ai@0.12.0)(cohere-ai@7.10.6(@aws-sdk/client-sso-oidc@3.613.0(@aws-sdk/client-sts@3.613.0))(encoding@0.1.13))(encoding@0.1.13)(openai@4.55.3(encoding@0.1.13)(zod@3.23.8)):
dependencies:
cliui: 8.0.1
isomorphic-fetch: 3.0.0(encoding@0.1.13)
optionalDependencies:
'@google/generative-ai': 0.12.0
cohere-ai: 7.10.6(@aws-sdk/client-sso-oidc@3.613.0(@aws-sdk/client-sts@3.613.0))(encoding@0.1.13)
openai: 4.52.5(encoding@0.1.13)
openai: 4.55.3(encoding@0.1.13)(zod@3.23.8)
transitivePeerDependencies:
- encoding
@@ -20693,7 +20702,7 @@ snapshots:
execa: 8.0.1
pathe: 1.1.2
pkg-types: 1.1.3
ufo: 1.5.3
ufo: 1.5.4
object-assign@4.1.1: {}
@@ -20824,6 +20833,35 @@ snapshots:
transitivePeerDependencies:
- encoding
openai@4.55.3(encoding@0.1.13)(zod@3.23.8):
dependencies:
'@types/node': 18.19.39
'@types/node-fetch': 2.6.11
abort-controller: 3.0.0
agentkeepalive: 4.5.0
form-data-encoder: 1.7.2
formdata-node: 4.4.1
node-fetch: 2.7.0(encoding@0.1.13)
optionalDependencies:
zod: 3.23.8
transitivePeerDependencies:
- encoding
openai@4.55.3(zod@3.23.8):
dependencies:
'@types/node': 18.19.39
'@types/node-fetch': 2.6.11
abort-controller: 3.0.0
agentkeepalive: 4.5.0
form-data-encoder: 1.7.2
formdata-node: 4.4.1
node-fetch: 2.7.0(encoding@0.1.13)
optionalDependencies:
zod: 3.23.8
transitivePeerDependencies:
- encoding
optional: true
opener@1.5.2: {}
option@0.2.4: {}