Compare commits

..

5 Commits

Author SHA1 Message Date
github-actions[bot] bf8cbeb6c5 Release 0.5.18 (#1124)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-08-19 12:53:28 +09:00
Alex Yang e27e7dd054 chore: bump natural to 8.0.1 (#1126) 2024-08-17 07:15:08 -07:00
Thuc Pham 8b66cf4341 feat: support organization id in llamacloud index (#1123)
Co-authored-by: Marcus Schiesser <mail@marcusschiesser.de>
2024-08-15 13:51:48 +07:00
github-actions[bot] 6f4549bdea Release 0.5.17 (#1117)
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-08-12 17:45:29 +07:00
Thuc Pham c654398f75 feat: implement Weaviate Vector Store in TS (#1109) 2024-08-12 17:41:05 +07:00
38 changed files with 974 additions and 1606 deletions
+14
View File
@@ -1,5 +1,19 @@
# docs
## 0.0.59
### Patch Changes
- Updated dependencies [8b66cf4]
- llamaindex@0.5.18
## 0.0.58
### Patch Changes
- Updated dependencies [c654398]
- llamaindex@0.5.17
## 0.0.57
### Patch Changes
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "docs",
"version": "0.0.57",
"version": "0.0.59",
"private": true,
"scripts": {
"docusaurus": "docusaurus",
+31
View File
@@ -0,0 +1,31 @@
# Weaviate Vector Store
Here are two sample scripts which work with loading and querying data from a Weaviate Vector Store.
## Prerequisites
- An Weaviate Vector Database
- Hosted https://weaviate.io/
- Self Hosted https://weaviate.io/developers/weaviate/installation/docker-compose#starter-docker-compose-file
- An OpenAI API Key
## Setup
1. Set your env variables:
- `WEAVIATE_CLUSTER_URL`: Address of your Weaviate Vector Store (like localhost:8080)
- `WEAVIATE_API_KEY`: Your Weaviate API key
- `OPENAI_API_KEY`: Your OpenAI key
2. `cd` Into the `examples` directory
3. run `npm i`
## Load the data
This sample loads the same dataset of movie reviews as sample dataset
run `npx tsx weaviate/load`
## Use RAG to Query the data
run `npx tsx weaviate/query`
+23
View File
@@ -0,0 +1,23 @@
import {
PapaCSVReader,
storageContextFromDefaults,
VectorStoreIndex,
WeaviateVectorStore,
} from "llamaindex";
const indexName = "MovieReviews";
async function main() {
try {
const reader = new PapaCSVReader(false);
const docs = await reader.loadData("./data/movie_reviews.csv");
const vectorStore = new WeaviateVectorStore({ indexName });
const storageContext = await storageContextFromDefaults({ vectorStore });
await VectorStoreIndex.fromDocuments(docs, { storageContext });
console.log("Successfully loaded data into Weaviate");
} catch (e) {
console.error(e);
}
}
void main();
+46
View File
@@ -0,0 +1,46 @@
import { VectorStoreIndex, WeaviateVectorStore } from "llamaindex";
const indexName = "MovieReviews";
async function main() {
try {
const query = "Get all movie titles.";
const vectorStore = new WeaviateVectorStore({ indexName });
const index = await VectorStoreIndex.fromVectorStore(vectorStore);
const retriever = index.asRetriever({ similarityTopK: 20 });
const queryEngine = index.asQueryEngine({ retriever });
const results = await queryEngine.query({ query });
console.log(`Query from ${results.sourceNodes?.length} nodes`);
console.log(results.response);
console.log("\n=====\nQuerying the index with filters");
const queryEngineWithFilters = index.asQueryEngine({
retriever,
preFilters: {
filters: [
{
key: "document_id",
value: "./data/movie_reviews.csv_37",
operator: "==",
},
{
key: "document_id",
value: "./data/movie_reviews.csv_21",
operator: "==",
},
],
condition: "or",
},
});
const resultAfterFilter = await queryEngineWithFilters.query({
query: "Get all movie titles.",
});
console.log(`Query from ${resultAfterFilter.sourceNodes?.length} nodes`);
console.log(resultAfterFilter.response);
} catch (e) {
console.error(e);
}
}
void main();
@@ -1,5 +1,21 @@
# @llamaindex/autotool-02-next-example
## 0.1.43
### Patch Changes
- Updated dependencies [8b66cf4]
- llamaindex@0.5.18
- @llamaindex/autotool@2.0.1
## 0.1.42
### Patch Changes
- Updated dependencies [c654398]
- llamaindex@0.5.17
- @llamaindex/autotool@2.0.1
## 0.1.41
### Patch Changes
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/autotool-02-next-example",
"private": true,
"version": "0.1.41",
"version": "0.1.43",
"scripts": {
"dev": "next dev",
"build": "next build",
+1 -1
View File
@@ -51,7 +51,7 @@
"unplugin": "^1.10.1"
},
"peerDependencies": {
"llamaindex": "^0.5.16",
"llamaindex": "^0.5.18",
"openai": "^4",
"typescript": "^4"
},
+7
View File
@@ -1,5 +1,12 @@
# @llamaindex/community
## 0.0.30
### Patch Changes
- Updated dependencies [e27e7dd]
- @llamaindex/core@0.1.9
## 0.0.29
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/community",
"description": "Community package for LlamaIndexTS",
"version": "0.0.29",
"version": "0.0.30",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
+6
View File
@@ -1,5 +1,11 @@
# @llamaindex/core
## 0.1.9
### Patch Changes
- e27e7dd: chore: bump `natural` to 8.0.1
## 0.1.8
### Patch Changes
+2 -2
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/core",
"type": "module",
"version": "0.1.8",
"version": "0.1.9",
"description": "LlamaIndex Core Module",
"exports": {
"./node-parser": {
@@ -132,7 +132,7 @@
"devDependencies": {
"ajv": "^8.16.0",
"bunchee": "5.3.1",
"natural": "^7.1.0"
"natural": "^8.0.1"
},
"dependencies": {
"@llamaindex/env": "workspace:*",
File diff suppressed because it is too large Load Diff
@@ -1,4 +1,5 @@
declare class SentenceTokenizer {
constructor(abbreviations?: string[]);
tokenize(text: string): string[];
}
@@ -0,0 +1,222 @@
var __getOwnPropNames = Object.getOwnPropertyNames;
var __commonJS = (cb, mod) =>
function __require() {
return (
mod ||
(0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod),
mod.exports
);
};
// lib/natural/tokenizers/tokenizer.js
var require_tokenizer = __commonJS({
"lib/natural/tokenizers/tokenizer.js"(exports, module) {
"use strict";
var Tokenizer = class {
trim(array) {
while (array[array.length - 1] === "") {
array.pop();
}
while (array[0] === "") {
array.shift();
}
return array;
}
};
module.exports = Tokenizer;
},
});
// lib/natural/tokenizers/sentence_tokenizer.js
var require_sentence_tokenizer = __commonJS({
"lib/natural/tokenizers/sentence_tokenizer.js"(exports, module) {
var Tokenizer = require_tokenizer();
var NUM = "NUMBER";
var DELIM = "DELIM";
var URI = "URI";
var ABBREV = "ABBREV";
var DEBUG = false;
function generateUniqueCode(base, index) {
return `{{${base}_${index}}}`;
}
function escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
var SentenceTokenizer = class extends Tokenizer {
constructor(abbreviations) {
super();
if (abbreviations) {
this.abbreviations = abbreviations;
} else {
this.abbreviations = [];
}
this.replacementMap = null;
this.replacementCounter = 0;
}
replaceUrisWithPlaceholders(text) {
const urlPattern =
/(https?:\/\/\S+|www\.\S+|ftp:\/\/\S+|(mailto:)?[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}|file:\/\/\S+)/gi;
const modifiedText = text.replace(urlPattern, (match) => {
const placeholder = generateUniqueCode(
URI,
this.replacementCounter++,
);
this.replacementMap.set(placeholder, match);
return placeholder;
});
return modifiedText;
}
replaceAbbreviations(text) {
if (this.abbreviations.length === 0) {
return text;
}
const pattern = new RegExp(
`(${this.abbreviations.map((abbrev) => escapeRegExp(abbrev)).join("|")})`,
"gi",
);
const replacedText = text.replace(pattern, (match) => {
const code = generateUniqueCode(ABBREV, this.replacementCounter++);
this.replacementMap.set(code, match);
return code;
});
return replacedText;
}
replaceDelimitersWithPlaceholders(text) {
const delimiterPattern = /([.?!… ]*)([.?!…])(["'”’)}\]]?)/g;
const modifiedText = text.replace(
delimiterPattern,
(match, p1, p2, p3) => {
const placeholder = generateUniqueCode(
DELIM,
this.replacementCounter++,
);
this.delimiterMap.set(placeholder, p1 + p2 + p3);
return placeholder;
},
);
return modifiedText;
}
splitOnPlaceholders(text, placeholders) {
if (this.delimiterMap.size === 0) {
return [text];
}
const keys = Array.from(this.delimiterMap.keys());
const pattern = new RegExp(`(${keys.map(escapeRegExp).join("|")})`);
const parts = text.split(pattern);
const sentences = [];
for (let i = 0; i < parts.length; i += 2) {
const sentence = parts[i];
const placeholder = parts[i + 1] || "";
sentences.push(sentence + placeholder);
}
return sentences;
}
replaceNumbersWithCode(text) {
const numberPattern = /\b\d{1,3}(?:,\d{3})*(?:\.\d+)?\b/g;
const replacedText = text.replace(numberPattern, (match) => {
const code = generateUniqueCode(NUM, this.replacementCounter++);
this.replacementMap.set(code, match);
return code;
});
return replacedText;
}
revertReplacements(text) {
let originalText = text;
for (const [
placeholder,
replacement,
] of this.replacementMap.entries()) {
const pattern = new RegExp(escapeRegExp(placeholder), "g");
originalText = originalText.replace(pattern, replacement);
}
return originalText;
}
revertDelimiters(text) {
let originalText = text;
for (const [placeholder, replacement] of this.delimiterMap.entries()) {
const pattern = new RegExp(escapeRegExp(placeholder), "g");
originalText = originalText.replace(pattern, replacement);
}
return originalText;
}
tokenize(text) {
this.replacementCounter = 0;
this.replacementMap = /* @__PURE__ */ new Map();
this.delimiterMap = /* @__PURE__ */ new Map();
DEBUG &&
console.log(
"---Start of sentence tokenization-----------------------",
);
DEBUG && console.log("Original input: >>>" + text + "<<<");
const result1 = this.replaceAbbreviations(text);
DEBUG &&
console.log(
"Phase 1: replacing abbreviations: " + JSON.stringify(result1),
);
const result2 = this.replaceUrisWithPlaceholders(result1);
DEBUG &&
console.log("Phase 2: replacing URIs: " + JSON.stringify(result2));
const result3 = this.replaceNumbersWithCode(result2);
DEBUG &&
console.log(
"Phase 3: replacing numbers with placeholders: " +
JSON.stringify(result3),
);
const result4 = this.replaceDelimitersWithPlaceholders(result3);
DEBUG &&
console.log(
"Phase 4: replacing delimiters with placeholders: " +
JSON.stringify(result4),
);
const sentences = this.splitOnPlaceholders(result4);
DEBUG &&
console.log(
"Phase 5: splitting into sentences on placeholders: " +
JSON.stringify(sentences),
);
const newSentences = sentences.map((s) => {
const s1 = this.revertReplacements(s);
return this.revertDelimiters(s1);
});
DEBUG &&
console.log(
"Phase 6: replacing back abbreviations, URIs, numbers and delimiters: " +
JSON.stringify(newSentences),
);
const trimmedSentences = this.trim(newSentences);
DEBUG &&
console.log(
"Phase 7: trimming array of empty sentences: " +
JSON.stringify(trimmedSentences),
);
const trimmedSentences2 = trimmedSentences.map((sent) => sent.trim());
DEBUG &&
console.log(
"Phase 8: trimming sentences from surrounding whitespace: " +
JSON.stringify(trimmedSentences2),
);
DEBUG &&
console.log(
"---End of sentence tokenization--------------------------",
);
DEBUG &&
console.log(
"---Replacement map---------------------------------------",
);
DEBUG && console.log([...this.replacementMap.entries()]);
DEBUG &&
console.log(
"---Delimiter map-----------------------------------------",
);
DEBUG && console.log([...this.delimiterMap.entries()]);
DEBUG &&
console.log(
"---------------------------------------------------------",
);
return trimmedSentences2;
}
};
module.exports = SentenceTokenizer;
},
});
export default require_sentence_tokenizer();
+9 -3
View File
@@ -1,5 +1,5 @@
import type { TextSplitter } from "./base";
import SentenceTokenizerNew from "./sentence-tokenizer-parser.js";
import SentenceTokenizer from "./sentence_tokenizer";
export type TextSplitterFn = (text: string) => string[];
@@ -31,11 +31,17 @@ export const splitByChar = (): TextSplitterFn => {
return (text: string) => text.split("");
};
let sentenceTokenizer: SentenceTokenizerNew | null = null;
let sentenceTokenizer: SentenceTokenizer | null = null;
export const splitBySentenceTokenizer = (): TextSplitterFn => {
if (!sentenceTokenizer) {
sentenceTokenizer = new SentenceTokenizerNew();
sentenceTokenizer = new SentenceTokenizer([
"i.e.",
"etc.",
"vs.",
"Inc.",
"A.S.A.P.",
]);
}
const tokenizer = sentenceTokenizer;
return (text: string) => {
+14
View File
@@ -1,5 +1,19 @@
# @llamaindex/experimental
## 0.0.68
### Patch Changes
- Updated dependencies [8b66cf4]
- llamaindex@0.5.18
## 0.0.67
### Patch Changes
- Updated dependencies [c654398]
- llamaindex@0.5.17
## 0.0.66
### Patch Changes
+1 -1
View File
@@ -1,7 +1,7 @@
{
"name": "@llamaindex/experimental",
"description": "Experimental package for LlamaIndexTS",
"version": "0.0.66",
"version": "0.0.68",
"type": "module",
"types": "dist/type/index.d.ts",
"main": "dist/cjs/index.js",
+14
View File
@@ -1,5 +1,19 @@
# llamaindex
## 0.5.18
### Patch Changes
- 8b66cf4: feat: support organization id in llamacloud index
- Updated dependencies [e27e7dd]
- @llamaindex/core@0.1.9
## 0.5.17
### Patch Changes
- c654398: Implement Weaviate Vector Store in TS
## 0.5.16
### Patch Changes
@@ -1,5 +1,19 @@
# @llamaindex/cloudflare-worker-agent-test
## 0.0.52
### Patch Changes
- Updated dependencies [8b66cf4]
- llamaindex@0.5.18
## 0.0.51
### Patch Changes
- Updated dependencies [c654398]
- llamaindex@0.5.17
## 0.0.50
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/cloudflare-worker-agent-test",
"version": "0.0.50",
"version": "0.0.52",
"type": "module",
"private": true,
"scripts": {
@@ -1,5 +1,19 @@
# @llamaindex/next-agent-test
## 0.1.52
### Patch Changes
- Updated dependencies [8b66cf4]
- llamaindex@0.5.18
## 0.1.51
### Patch Changes
- Updated dependencies [c654398]
- llamaindex@0.5.17
## 0.1.50
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-agent-test",
"version": "0.1.50",
"version": "0.1.52",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,19 @@
# test-edge-runtime
## 0.1.51
### Patch Changes
- Updated dependencies [8b66cf4]
- llamaindex@0.5.18
## 0.1.50
### Patch Changes
- Updated dependencies [c654398]
- llamaindex@0.5.17
## 0.1.49
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/nextjs-edge-runtime-test",
"version": "0.1.49",
"version": "0.1.51",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,19 @@
# @llamaindex/next-node-runtime
## 0.0.33
### Patch Changes
- Updated dependencies [8b66cf4]
- llamaindex@0.5.18
## 0.0.32
### Patch Changes
- Updated dependencies [c654398]
- llamaindex@0.5.17
## 0.0.31
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/next-node-runtime-test",
"version": "0.0.31",
"version": "0.0.33",
"private": true,
"scripts": {
"dev": "next dev",
@@ -1,5 +1,19 @@
# @llamaindex/waku-query-engine-test
## 0.0.52
### Patch Changes
- Updated dependencies [8b66cf4]
- llamaindex@0.5.18
## 0.0.51
### Patch Changes
- Updated dependencies [c654398]
- llamaindex@0.5.17
## 0.0.50
### Patch Changes
@@ -1,6 +1,6 @@
{
"name": "@llamaindex/waku-query-engine-test",
"version": "0.0.50",
"version": "0.0.52",
"type": "module",
"private": true,
"scripts": {
+2 -1
View File
@@ -1,6 +1,6 @@
{
"name": "llamaindex",
"version": "0.5.16",
"version": "0.5.18",
"license": "MIT",
"type": "module",
"keywords": [
@@ -65,6 +65,7 @@
"string-strip-html": "^13.4.8",
"tiktoken": "^1.0.15",
"unpdf": "^0.11.0",
"weaviate-client": "^3.1.4",
"wikipedia": "^2.1.2",
"wink-nlp": "^2.3.0",
"zod": "^3.23.8"
@@ -8,7 +8,7 @@ import type { CloudRetrieveParams } from "./LlamaCloudRetriever.js";
import { LlamaCloudRetriever } from "./LlamaCloudRetriever.js";
import { getPipelineCreate } from "./config.js";
import type { CloudConstructorParams } from "./constants.js";
import { getAppBaseUrl, initService } from "./utils.js";
import { getAppBaseUrl, getProjectId, initService } from "./utils.js";
import { PipelinesService, ProjectsService } from "@llamaindex/cloud/api";
import { SentenceSplitter } from "@llamaindex/core/node-parser";
@@ -132,18 +132,28 @@ export class LlamaCloudIndex {
await this.waitForPipelineIngestion(verbose, raiseOnError);
}
private async getPipelineId(
name: string,
projectName: string,
public async getPipelineId(
name?: string,
projectName?: string,
): Promise<string> {
const pipelines = await PipelinesService.searchPipelinesApiV1PipelinesGet({
projectName,
pipelineName: name,
projectId: await this.getProjectId(projectName),
pipelineName: name ?? this.params.name,
});
return pipelines[0].id;
}
public async getProjectId(
projectName?: string,
organizationId?: string,
): Promise<string> {
return await getProjectId(
projectName ?? this.params.projectName,
organizationId ?? this.params.organizationId,
);
}
static async fromDocuments(
params: {
documents: Document[];
@@ -168,6 +178,7 @@ export class LlamaCloudIndex {
});
const project = await ProjectsService.upsertProjectApiV1ProjectsPut({
organizationId: params.organizationId,
requestBody: {
name: params.projectName ?? "default",
},
@@ -11,7 +11,7 @@ import { extractText, wrapEventCaller } from "@llamaindex/core/utils";
import type { BaseRetriever, RetrieveParams } from "../Retriever.js";
import type { ClientParams, CloudConstructorParams } from "./constants.js";
import { DEFAULT_PROJECT_NAME } from "./constants.js";
import { initService } from "./utils.js";
import { getProjectId, initService } from "./utils.js";
export type CloudRetrieveParams = Omit<
RetrievalParams,
@@ -21,6 +21,7 @@ export type CloudRetrieveParams = Omit<
export class LlamaCloudRetriever implements BaseRetriever {
clientParams: ClientParams;
retrieveParams: CloudRetrieveParams;
organizationId?: string;
projectName: string = DEFAULT_PROJECT_NAME;
pipelineName: string;
@@ -49,6 +50,9 @@ export class LlamaCloudRetriever implements BaseRetriever {
if (params.projectName) {
this.projectName = params.projectName;
}
if (params.organizationId) {
this.organizationId = params.organizationId;
}
}
@wrapEventCaller
@@ -57,7 +61,7 @@ export class LlamaCloudRetriever implements BaseRetriever {
preFilters,
}: RetrieveParams): Promise<NodeWithScore[]> {
const pipelines = await PipelinesService.searchPipelinesApiV1PipelinesGet({
projectName: this.projectName,
projectId: await getProjectId(this.projectName, this.organizationId),
pipelineName: this.pipelineName,
});
@@ -8,5 +8,6 @@ export type ClientParams = { apiKey?: string; baseUrl?: string };
export type CloudConstructorParams = {
name: string;
projectName: string;
organizationId?: string;
serviceContext?: ServiceContext;
} & ClientParams;
+29 -1
View File
@@ -1,4 +1,4 @@
import { OpenAPI } from "@llamaindex/cloud/api";
import { OpenAPI, ProjectsService } from "@llamaindex/cloud/api";
import { getEnv } from "@llamaindex/env";
import type { ClientParams } from "./constants.js";
import { DEFAULT_BASE_URL } from "./constants.js";
@@ -20,3 +20,31 @@ export function initService({ apiKey, baseUrl }: ClientParams = {}) {
);
}
}
export async function getProjectId(
projectName: string,
organizationId?: string,
): Promise<string> {
const projects = await ProjectsService.listProjectsApiV1ProjectsGet({
projectName: projectName,
organizationId: organizationId,
});
if (projects.length === 0) {
throw new Error(
`Unknown project name ${projectName}. Please confirm a managed project with this name exists.`,
);
} else if (projects.length > 1) {
throw new Error(
`Multiple projects found with name ${projectName}. Please specify organization_id.`,
);
}
const project = projects[0];
if (!project.id) {
throw new Error(`No project found with name ${projectName}`);
}
return project.id;
}
+1
View File
@@ -18,3 +18,4 @@ export { PineconeVectorStore } from "./vectorStore/PineconeVectorStore.js";
export { QdrantVectorStore } from "./vectorStore/QdrantVectorStore.js";
export { SimpleVectorStore } from "./vectorStore/SimpleVectorStore.js";
export * from "./vectorStore/types.js";
export { WeaviateVectorStore } from "./vectorStore/WeaviateVectorStore.js";
@@ -0,0 +1,339 @@
/* eslint-disable turbo/no-undeclared-env-vars */
import { BaseNode, MetadataMode, type Metadata } from "@llamaindex/core/schema";
import weaviate, {
Filters,
type Collection,
type DeleteManyOptions,
type FilterValue,
type WeaviateClient,
type WeaviateNonGenericObject,
} from "weaviate-client";
import {
VectorStoreBase,
VectorStoreQueryMode,
type IEmbedModel,
type MetadataFilter,
type MetadataFilters,
type VectorStoreNoEmbedModel,
type VectorStoreQuery,
type VectorStoreQueryResult,
} from "./types.js";
import {
metadataDictToNode,
nodeToMetadata,
parseArrayValue,
parseNumberValue,
} from "./utils.js";
const NODE_SCHEMA = [
{
dataType: ["text"],
description: "Text property",
name: "text",
},
{
dataType: ["text"],
description: "The ref_doc_id of the Node",
name: "ref_doc_id",
},
{
dataType: ["text"],
description: "node_info (in JSON)",
name: "node_info",
},
{
dataType: ["text"],
description: "The relationships of the node (in JSON)",
name: "relationships",
},
];
const SIMILARITY_KEYS: {
[key: string]: "distance" | "score";
} = {
[VectorStoreQueryMode.DEFAULT]: "distance",
[VectorStoreQueryMode.HYBRID]: "score",
};
const buildFilterItem = (
collection: Collection,
filter: MetadataFilter,
): FilterValue => {
const { key, operator, value } = filter;
switch (operator) {
case "==": {
return collection.filter.byProperty(key).equal(value);
}
case "!=": {
return collection.filter.byProperty(key).notEqual(value);
}
case ">": {
return collection.filter
.byProperty(key)
.greaterThan(parseNumberValue(value));
}
case "<": {
return collection.filter
.byProperty(key)
.lessThan(parseNumberValue(value));
}
case ">=": {
return collection.filter
.byProperty(key)
.greaterOrEqual(parseNumberValue(value));
}
case "<=": {
return collection.filter
.byProperty(key)
.lessOrEqual(parseNumberValue(value));
}
case "any": {
return collection.filter
.byProperty(key)
.containsAny(parseArrayValue(value).map(String));
}
case "all": {
return collection.filter
.byProperty(key)
.containsAll(parseArrayValue(value).map(String));
}
default: {
throw new Error(`Operator ${operator} is not supported.`);
}
}
};
const toWeaviateFilter = (
collection: Collection,
standardFilters?: MetadataFilters,
): FilterValue | undefined => {
if (!standardFilters?.filters.length) return undefined;
const filtersList = standardFilters.filters.map((filter) =>
buildFilterItem(collection, filter),
);
if (filtersList.length === 1) return filtersList[0];
const condition = standardFilters.condition ?? "and";
return Filters[condition](...filtersList);
};
export class WeaviateVectorStore
extends VectorStoreBase
implements VectorStoreNoEmbedModel
{
public storesText: boolean = true;
private flatMetadata: boolean = true;
private weaviateClient?: WeaviateClient;
private clusterURL!: string;
private apiKey!: string;
private indexName: string;
private idKey: string;
private contentKey: string;
private embeddingKey: string;
private metadataKey: string;
constructor(
init?: Partial<IEmbedModel> & {
weaviateClient?: WeaviateClient;
cloudOptions?: {
clusterURL?: string;
apiKey?: string;
};
indexName?: string;
idKey?: string;
contentKey?: string;
metadataKey?: string;
embeddingKey?: string;
},
) {
super(init?.embedModel);
if (init?.weaviateClient) {
// Use the provided client
this.weaviateClient = init.weaviateClient;
} else {
// Load client cloud options from config or env
const clusterURL =
init?.cloudOptions?.clusterURL ?? process.env.WEAVIATE_CLUSTER_URL;
const apiKey = init?.cloudOptions?.apiKey ?? process.env.WEAVIATE_API_KEY;
if (!clusterURL || !apiKey) {
throw new Error(
"Must specify WEAVIATE_CLUSTER_URL and WEAVIATE_API_KEY via env variable.",
);
}
this.clusterURL = clusterURL;
this.apiKey = apiKey;
}
this.checkIndexName(init?.indexName);
this.indexName = init?.indexName ?? "LlamaIndex";
this.idKey = init?.idKey ?? "id";
this.contentKey = init?.contentKey ?? "text";
this.embeddingKey = init?.embeddingKey ?? "vectors";
this.metadataKey = init?.metadataKey ?? "node_info";
}
public client() {
return this.getClient();
}
public async add(nodes: BaseNode<Metadata>[]): Promise<string[]> {
const collection = await this.ensureCollection({ createIfNotExists: true });
const result = await collection.data.insertMany(
nodes.map((node) => {
const metadata = nodeToMetadata(
node,
true,
this.contentKey,
this.flatMetadata,
);
const body = {
[this.idKey]: node.id_,
[this.embeddingKey]: node.getEmbedding(),
properties: {
...metadata,
[this.contentKey]: node.getContent(MetadataMode.NONE),
[this.metadataKey]: JSON.stringify(metadata),
relationships: JSON.stringify({ ref_doc_id: metadata.ref_doc_id }),
},
};
return body;
}),
);
return Object.values(result.uuids);
}
public async delete(
refDocId: string,
deleteOptions?: DeleteManyOptions<boolean>,
): Promise<void> {
const collection = await this.ensureCollection();
await collection.data.deleteMany(
collection.filter.byProperty("ref_doc_id").like(refDocId),
deleteOptions,
);
}
public async query(query: VectorStoreQuery): Promise<VectorStoreQueryResult> {
const collection = await this.ensureCollection();
const allProperties = await this.getAllProperties();
let filters: FilterValue | undefined = undefined;
if (query.docIds) {
filters = collection.filter
.byProperty("doc_id")
.containsAny(query.docIds);
}
if (query.filters) {
filters = toWeaviateFilter(collection, query.filters);
}
const queryResult = await collection.query.hybrid(query.queryStr!, {
vector: query.queryEmbedding,
alpha: this.getQueryAlpha(query),
limit: query.similarityTopK,
returnMetadata: Object.values(SIMILARITY_KEYS),
returnProperties: allProperties,
includeVector: true,
filters,
});
const entries = queryResult.objects;
const similarityKey = SIMILARITY_KEYS[query.mode];
const nodes: BaseNode<Metadata>[] = [];
const similarities: number[] = [];
const ids: string[] = [];
entries.forEach((entry, index) => {
if (index < query.similarityTopK && entry.metadata) {
const node = metadataDictToNode(entry.properties);
node.setContent(entry.properties[this.contentKey]);
nodes.push(node);
ids.push(entry.uuid);
similarities.push(this.getNodeSimilarity(entry, similarityKey));
}
});
return {
nodes,
similarities,
ids,
};
}
private async getClient(): Promise<WeaviateClient> {
if (this.weaviateClient) return this.weaviateClient;
const client = await weaviate.connectToWeaviateCloud(this.clusterURL, {
authCredentials: new weaviate.ApiKey(this.apiKey),
});
this.weaviateClient = client;
return client;
}
private async ensureCollection({ createIfNotExists = false } = {}) {
const client = await this.getClient();
const exists = await this.doesCollectionExist();
if (!exists) {
if (createIfNotExists) {
await this.createCollection();
} else {
throw new Error(`Collection ${this.indexName} does not exist.`);
}
}
return client.collections.get(this.indexName);
}
private async doesCollectionExist() {
const client = await this.getClient();
return client.collections.exists(this.indexName);
}
private async createCollection() {
const client = await this.getClient();
return await client.collections.createFromSchema({
class: this.indexName,
description: `Collection for ${this.indexName}`,
properties: NODE_SCHEMA,
});
}
private getQueryAlpha(query: VectorStoreQuery): number | undefined {
if (!query.queryEmbedding) return undefined;
if (query.mode === VectorStoreQueryMode.DEFAULT) return 1;
if (query.mode === VectorStoreQueryMode.HYBRID && query.queryStr)
return query.alpha;
return undefined;
}
private async getAllProperties(): Promise<string[]> {
const collection = await this.ensureCollection();
const properties = (await collection.config.get()).properties;
return properties.map((p) => p.name);
}
private checkIndexName(indexName?: string) {
if (indexName && indexName[0] !== indexName[0].toUpperCase()) {
throw new Error(
"Index name must start with a capital letter, e.g. 'LlamaIndex'",
);
}
}
private getNodeSimilarity(
entry: WeaviateNonGenericObject,
similarityKey: "distance" | "score" = "distance",
): number {
const distance = entry.metadata?.[similarityKey];
if (distance === undefined) return 1;
// convert distance https://forum.weaviate.io/t/distance-vs-certainty-scores/258
return 1 - distance;
}
}
@@ -99,3 +99,8 @@ export const parseArrayValue = (
}
return value;
};
export const parseNumberValue = (value?: MetadataFilterValue): number => {
if (typeof value !== "number") throw new Error("Value must be a number");
return value;
};
+89 -10
View File
@@ -382,8 +382,8 @@ importers:
specifier: 5.3.1
version: 5.3.1(typescript@5.5.3)
natural:
specifier: ^7.1.0
version: 7.1.0(@aws-sdk/credential-providers@3.613.0)
specifier: ^8.0.1
version: 8.0.1(@aws-sdk/credential-providers@3.613.0)
packages/core/tests:
devDependencies:
@@ -602,6 +602,9 @@ importers:
unpdf:
specifier: ^0.11.0
version: 0.11.0(encoding@0.1.13)
weaviate-client:
specifier: ^3.1.4
version: 3.1.4(encoding@0.1.13)
wikipedia:
specifier: ^2.1.2
version: 2.1.2
@@ -2666,6 +2669,11 @@ packages:
resolution: {integrity: sha512-krWjurjEUHSFhCX4lGHMOhbnpBfYZGU31mpHpPBQwcfWm0T+/+wxC4UCAJfkxxc3/HvGJVG8r4AqrffaeDHDlA==}
engines: {node: '>=18.0.0'}
'@graphql-typed-document-node/core@3.2.0':
resolution: {integrity: sha512-mB9oAsNCm9aM3/SOv4YtBMqZbYj10R7dkq8byBqxGY/ncFwhf2oQzMV+LCRlWoDSEBJ3COiR1yeDvMtsoOsuFQ==}
peerDependencies:
graphql: ^0.8.0 || ^0.9.0 || ^0.10.0 || ^0.11.0 || ^0.12.0 || ^0.13.0 || ^14.0.0 || ^15.0.0 || ^16.0.0 || ^17.0.0
'@grpc/grpc-js@1.10.11':
resolution: {integrity: sha512-3RaoxOqkHHN2c05bwtBNVJmOf/UwMam0rZYtdl7dsRpsvDwcNpv6LkGgzltQ7xVf822LzBoKEPRvf4D7+xeIDw==}
engines: {node: '>=12.10.0'}
@@ -4375,6 +4383,9 @@ packages:
abbrev@1.1.1:
resolution: {integrity: sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==}
abort-controller-x@0.4.3:
resolution: {integrity: sha512-VtUwTNU8fpMwvWGn4xE93ywbogTYsuT+AUxAXOeelbXuQVIwNmC5YLeho9sH4vZ4ITW8414TTAOG1nW6uIVHCA==}
abort-controller@3.0.0:
resolution: {integrity: sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==}
engines: {node: '>=6.5'}
@@ -6568,6 +6579,15 @@ packages:
graphemer@1.4.0:
resolution: {integrity: sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==}
graphql-request@6.1.0:
resolution: {integrity: sha512-p+XPfS4q7aIpKVcgmnZKhMNqhltk20hfXtkaIkTfjjmiKMJ5xrt5c743cL03y/K7y1rg3WrIC49xGiEQ4mxdNw==}
peerDependencies:
graphql: 14 - 16
graphql@16.9.0:
resolution: {integrity: sha512-GGTKBX4SD7Wdb8mqeDLni2oaRGYQWjWHGKPQ24ZMnUtKfcsVoiv4uX8+LJr1K6U5VW2Lu1BwJnj7uiori0YtRw==}
engines: {node: ^12.22.0 || ^14.16.0 || ^16.0.0 || >=17.0.0}
gray-matter@4.0.3:
resolution: {integrity: sha512-5v6yZd4JK3eMI3FqqCouswVqwugaA9r4dNZB1wwcmrD02QkV5H0y7XBQW8QwQqEaZY1pM9aqORSORhJRdNK44Q==}
engines: {node: '>=6.0'}
@@ -8079,8 +8099,8 @@ packages:
natural-compare@1.4.0:
resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==}
natural@7.1.0:
resolution: {integrity: sha512-GBhiRgF0VUX+zPWahBVir1ajARQDZF1Fe6UpQORNzyQT57JQ2KLKYvubecvjIYh/uDaociusmySeRh+WL5OdxQ==}
natural@8.0.1:
resolution: {integrity: sha512-VVw8O5KrfvwqAFeNZEgBbdgA+AQaBlHcXEootWU7TWDaFWFI0VLfzyKMsRjnfdS3cVCpWmI04xLJonCvEv11VQ==}
engines: {node: '>=0.4.10'}
negotiator@0.6.3:
@@ -8126,6 +8146,15 @@ packages:
sass:
optional: true
nice-grpc-client-middleware-deadline@2.0.12:
resolution: {integrity: sha512-drKxQJzTbh+Qkd6v+BcRhTmY2mw9zR8Qigu/jk0vIkDi90K6NOOJGgvBdbTxKXtv6QY1g07T1LvwaqW3Mlwdvw==}
nice-grpc-common@2.0.2:
resolution: {integrity: sha512-7RNWbls5kAL1QVUOXvBsv1uO0wPQK3lHv+cY1gwkTzirnG1Nop4cBJZubpgziNbaVc/bl9QJcyvsf/NQxa3rjQ==}
nice-grpc@2.1.9:
resolution: {integrity: sha512-shJlg1t4Wn3qTVE31gxofbTrgCX/p4tS1xRnk4bNskCYKvXNEUpJQZpjModsVk1aau69YZDViyC18K9nC7QHYA==}
nice-napi@1.0.2:
resolution: {integrity: sha512-px/KnJAJZf5RuBGcfD+Sp2pAKq0ytz8j+1NehvgIGFkvtvFrDM3T8E4x/JJODXK9WZow8RRGrbA9QQ3hs+pDhA==}
os: ['!win32']
@@ -10313,6 +10342,9 @@ packages:
peerDependencies:
typescript: '>=4.2.0'
ts-error@1.0.6:
resolution: {integrity: sha512-tLJxacIQUM82IR7JO1UUkKlYuUTmoY9HBJAmNWFzheSlDS5SPMcNIepejHJa4BpPQLAcbRhRf3GDJzyj6rbKvA==}
ts-graphviz@1.8.2:
resolution: {integrity: sha512-5YhbFoHmjxa7pgQLkB07MtGnGJ/yhvjmc9uhsnDBEICME6gkPf83SBwLDQqGDoCa3XzUMWLk1AU2Wn1u1naDtA==}
engines: {node: '>=14.16'}
@@ -10788,6 +10820,10 @@ packages:
wcwidth@1.0.1:
resolution: {integrity: sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg==}
weaviate-client@3.1.4:
resolution: {integrity: sha512-Bw9KV0wtFd4TdifhPAkmc2Lv7bKIX0L2oqObUNG8K8Mv0zoVixGcqlAS3xJdfQ2jSqz0vH3mfetsOBdlvogxfg==}
engines: {node: '>=18.0.0'}
web-namespaces@2.0.1:
resolution: {integrity: sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==}
@@ -11967,7 +12003,7 @@ snapshots:
'@babel/core': 7.24.7
'@babel/helper-compilation-targets': 7.24.7
'@babel/helper-plugin-utils': 7.24.7
debug: 4.3.5
debug: 4.3.6
lodash.debounce: 4.0.8
resolve: 1.22.8
transitivePeerDependencies:
@@ -13987,6 +14023,10 @@ snapshots:
'@google/generative-ai@0.12.0': {}
'@graphql-typed-document-node/core@3.2.0(graphql@16.9.0)':
dependencies:
graphql: 16.9.0
'@grpc/grpc-js@1.10.11':
dependencies:
'@grpc/proto-loader': 0.7.13
@@ -15564,7 +15604,7 @@ snapshots:
dependencies:
'@typescript-eslint/types': 5.62.0
'@typescript-eslint/visitor-keys': 5.62.0
debug: 4.3.5
debug: 4.3.6
globby: 11.1.0
is-glob: 4.0.3
semver: 7.6.2
@@ -15865,6 +15905,8 @@ snapshots:
abbrev@1.1.1:
optional: true
abort-controller-x@0.4.3: {}
abort-controller@3.0.0:
dependencies:
event-target-shim: 5.0.1
@@ -15900,7 +15942,7 @@ snapshots:
agent-base@6.0.2:
dependencies:
debug: 4.3.5
debug: 4.3.6
transitivePeerDependencies:
- supports-color
optional: true
@@ -18625,6 +18667,16 @@ snapshots:
graphemer@1.4.0: {}
graphql-request@6.1.0(encoding@0.1.13)(graphql@16.9.0):
dependencies:
'@graphql-typed-document-node/core': 3.2.0(graphql@16.9.0)
cross-fetch: 3.1.8(encoding@0.1.13)
graphql: 16.9.0
transitivePeerDependencies:
- encoding
graphql@16.9.0: {}
gray-matter@4.0.3:
dependencies:
js-yaml: 3.14.1
@@ -20452,7 +20504,7 @@ snapshots:
mquery@5.0.0:
dependencies:
debug: 4.3.5
debug: 4.3.6
transitivePeerDependencies:
- supports-color
@@ -20490,7 +20542,7 @@ snapshots:
natural-compare@1.4.0: {}
natural@7.1.0(@aws-sdk/credential-providers@3.613.0):
natural@8.0.1(@aws-sdk/credential-providers@3.613.0):
dependencies:
afinn-165: 1.0.4
afinn-165-financialmarketnews: 3.0.0
@@ -20599,6 +20651,20 @@ snapshots:
- '@babel/core'
- babel-plugin-macros
nice-grpc-client-middleware-deadline@2.0.12:
dependencies:
nice-grpc-common: 2.0.2
nice-grpc-common@2.0.2:
dependencies:
ts-error: 1.0.6
nice-grpc@2.1.9:
dependencies:
'@grpc/grpc-js': 1.10.11
abort-controller-x: 0.4.3
nice-grpc-common: 2.0.2
nice-napi@1.0.2:
dependencies:
node-addon-api: 3.2.1
@@ -22589,7 +22655,7 @@ snapshots:
spdy-transport@3.0.0:
dependencies:
debug: 4.3.5
debug: 4.3.6
detect-node: 2.1.0
hpack.js: 2.1.6
obuf: 1.1.2
@@ -23091,6 +23157,8 @@ snapshots:
dependencies:
typescript: 5.5.3
ts-error@1.0.6: {}
ts-graphviz@1.8.2: {}
ts-interface-checker@0.1.13: {}
@@ -23603,6 +23671,17 @@ snapshots:
dependencies:
defaults: 1.0.4
weaviate-client@3.1.4(encoding@0.1.13):
dependencies:
graphql: 16.9.0
graphql-request: 6.1.0(encoding@0.1.13)(graphql@16.9.0)
long: 5.2.3
nice-grpc: 2.1.9
nice-grpc-client-middleware-deadline: 2.0.12
uuid: 9.0.1
transitivePeerDependencies:
- encoding
web-namespaces@2.0.1: {}
web-streams-polyfill@3.3.3: {}