Compare commits

...

5 Commits

Author SHA1 Message Date
Emanuel Ferreira e03bf70359 tests and improvements 2024-02-28 09:39:55 -03:00
Emanuel Ferreira a60e49d334 fixes 2024-02-28 08:51:17 -03:00
Emanuel Ferreira 0bec39b3a8 fix: results with key 2024-02-27 14:37:25 -03:00
Emanuel Ferreira f9f9845b56 build 2024-02-27 11:18:09 -03:00
Emanuel Ferreira 045cacf89f feat: JSON Query Engine 2024-02-27 11:16:29 -03:00
7 changed files with 543 additions and 4 deletions
+121
View File
@@ -0,0 +1,121 @@
import {
JSONQueryEngine,
OpenAI,
serviceContextFromDefaults,
} from "llamaindex";
const jsonValue = {
blogPosts: [
{
id: 1,
title: "First blog post",
content: "This is my first blog post",
},
{
id: 2,
title: "Second blog post",
content: "This is my second blog post",
},
],
comments: [
{
id: 1,
content: "Nice post!",
username: "jerry",
blogPostId: 1,
},
{
id: 2,
content: "Interesting thoughts",
username: "simon",
blogPostId: 2,
},
{
id: 3,
content: "Loved reading this!",
username: "simon",
blogPostId: 2,
},
],
};
const jsonSchema = {
type: "object",
properties: {
blogPosts: {
type: "array",
items: {
type: "object",
properties: {
id: {
type: "number",
},
title: {
type: "string",
},
content: {
type: "string",
},
},
required: ["id", "title", "content"],
},
},
comments: {
type: "array",
items: {
type: "object",
properties: {
id: {
type: "number",
},
content: {
type: "string",
},
username: {
type: "string",
},
blogPostId: {
type: "number",
},
},
required: ["id", "content", "username", "blogPostId"],
},
},
},
required: ["blogPosts", "comments"],
};
async function main() {
const llm = new OpenAI({ model: "gpt-4" });
const serviceContext = serviceContextFromDefaults({
llm,
});
const jsonQueryEngine = new JSONQueryEngine({
jsonValue,
jsonSchema,
serviceContext,
});
const rawQueryEngine = new JSONQueryEngine({
jsonValue,
jsonSchema,
serviceContext,
synthesizeResponse: false,
});
const response = await jsonQueryEngine.query({
query: "give to me the comment with id 1",
});
const rawResponse = await rawQueryEngine.query({
query: "give me all simon comments",
});
console.log({ response });
console.log({ rawResponse });
}
main();
+6 -4
View File
@@ -7,22 +7,24 @@
"@anthropic-ai/sdk": "^0.13.0",
"@aws-crypto/sha256-js": "^5.2.0",
"@datastax/astra-db-ts": "^0.1.4",
"@types/lodash": "^4.14.202",
"@types/node": "^18.19.14",
"@types/papaparse": "^5.3.14",
"@types/pg": "^8.11.0",
"@llamaindex/cloud": "^0.0.1",
"@llamaindex/env": "workspace:*",
"@mistralai/mistralai": "^0.0.10",
"@notionhq/client": "^2.2.14",
"@pinecone-database/pinecone": "^2.0.1",
"@qdrant/js-client-rest": "^1.7.0",
"@types/jsonpath": "^0.2.4",
"@types/lodash": "^4.14.202",
"@types/node": "^18.19.14",
"@types/papaparse": "^5.3.14",
"@types/pg": "^8.11.0",
"@xenova/transformers": "^2.15.0",
"assemblyai": "^4.2.2",
"chromadb": "~1.7.3",
"cohere-ai": "^7.7.5",
"file-type": "^18.7.0",
"js-tiktoken": "^1.0.10",
"jsonpath": "^1.1.1",
"lodash": "^4.17.21",
"mammoth": "^1.6.0",
"md-utils-ts": "^2.0.0",
@@ -0,0 +1,208 @@
import jsonpath from "jsonpath";
import { Response } from "../../Response.js";
import {
serviceContextFromDefaults,
type ServiceContext,
} from "../../ServiceContext.js";
import type {
BaseQueryEngine,
QueryEngineParamsNonStreaming,
QueryEngineParamsStreaming,
} from "../../types.js";
import {
defaultJsonPathPrompt,
defaultResponseSynthesizePrompt,
type JSONPathPrompt,
type ResponseSynthesisPrompt,
} from "./prompts.js";
export type JSONSchemaType = Record<string, unknown>;
function removeExtraQuotes(expr: string) {
let startIndex = 0;
let endIndex = expr.length;
// Trim the leading backticks and single quotes
while (
startIndex < endIndex &&
(expr[startIndex] === "`" || expr[startIndex] === "'")
) {
startIndex++;
}
// Trim the trailing backticks and single quotes
while (
endIndex > startIndex &&
(expr[endIndex - 1] === "`" || expr[endIndex - 1] === "'")
) {
endIndex--;
}
// Return the trimmed substring
return expr.substring(startIndex, endIndex);
}
export const defaultOutputProcessor = async ({
llmOutput,
jsonValue,
}: {
llmOutput: string;
jsonValue: JSONSchemaType;
}): Promise<Record<string, unknown>[]> => {
const expressions = llmOutput
.split(",")
.map((expr) => removeExtraQuotes(expr.trim()));
const results: Record<string, unknown>[] = [];
for (const expression of expressions) {
// get the key for example content from $.content
const key = expression.split(".").pop();
try {
const datums = jsonpath.query(jsonValue, expression);
if (!key) throw new Error(`Invalid JSON Path: ${expression}`);
for (const datum of datums) {
// in case there is a filter like [?(@.username=='simon')] without a key ie: $..comments[?(@.username=='simon').content]
if (key.includes("==")) {
results.push(datum);
continue;
}
results.push({
[key]: datum,
});
}
} catch (err) {
throw new Error(`Invalid JSON Path: ${expression}`);
}
}
return results;
};
type OutputProcessor = typeof defaultOutputProcessor;
/**
* A JSON query engine that uses JSONPath to query a JSON object.
*/
export class JSONQueryEngine implements BaseQueryEngine {
jsonValue: JSONSchemaType;
jsonSchema: JSONSchemaType;
serviceContext: ServiceContext;
outputProcessor: OutputProcessor;
verbose: boolean;
jsonPathPrompt: JSONPathPrompt;
synthesizeResponse: boolean;
responseSynthesisPrompt: ResponseSynthesisPrompt;
constructor(init: {
jsonValue: JSONSchemaType;
jsonSchema: JSONSchemaType;
serviceContext?: ServiceContext;
jsonPathPrompt?: JSONPathPrompt;
outputProcessor?: OutputProcessor;
synthesizeResponse?: boolean;
responseSynthesisPrompt?: ResponseSynthesisPrompt;
verbose?: boolean;
}) {
this.jsonValue = init.jsonValue;
this.jsonSchema = init.jsonSchema;
this.serviceContext = init.serviceContext ?? serviceContextFromDefaults({});
this.jsonPathPrompt = init.jsonPathPrompt ?? defaultJsonPathPrompt;
this.outputProcessor = init.outputProcessor ?? defaultOutputProcessor;
this.verbose = init.verbose ?? false;
this.synthesizeResponse = init.synthesizeResponse ?? true;
this.responseSynthesisPrompt =
init.responseSynthesisPrompt ?? defaultResponseSynthesizePrompt;
}
getPrompts(): Record<string, unknown> {
return {
jsonPathPrompt: this.jsonPathPrompt,
responseSynthesisPrompt: this.responseSynthesisPrompt,
};
}
updatePrompts(prompts: {
jsonPathPrompt?: JSONPathPrompt;
responseSynthesisPrompt?: ResponseSynthesisPrompt;
}): void {
if (prompts.jsonPathPrompt) {
this.jsonPathPrompt = prompts.jsonPathPrompt;
}
if (prompts.responseSynthesisPrompt) {
this.responseSynthesisPrompt = prompts.responseSynthesisPrompt;
}
}
getPromptModules(): Record<string, unknown> {
return {};
}
getSchemaContext(): string {
return JSON.stringify(this.jsonSchema);
}
query(params: QueryEngineParamsStreaming): Promise<AsyncIterable<Response>>;
query(params: QueryEngineParamsNonStreaming): Promise<Response>;
async query(
params: QueryEngineParamsStreaming | QueryEngineParamsNonStreaming,
): Promise<Response | AsyncIterable<Response>> {
const { query, stream } = params;
if (stream) {
throw new Error("Streaming is not supported");
}
const schema = this.getSchemaContext();
const jsonPathResponseStr = await this.serviceContext.llm.complete({
prompt: this.jsonPathPrompt({ query, schema }),
});
if (this.verbose) {
console.log(
`> JSONPath Instructions:\n\`\`\`\n${jsonPathResponseStr}\n\`\`\`\n`,
);
}
const jsonPathOutput = await this.outputProcessor({
llmOutput: jsonPathResponseStr.text,
jsonValue: this.jsonValue,
});
if (this.verbose) {
console.log(`> JSONPath Output: ${jsonPathOutput}\n`);
}
let responseStr;
if (this.synthesizeResponse) {
responseStr = await this.serviceContext.llm.complete({
prompt: this.responseSynthesisPrompt({
query,
jsonSchema: schema,
jsonPath: jsonPathResponseStr.text,
jsonPathValue: JSON.stringify(jsonPathOutput),
}),
});
responseStr = responseStr.text;
} else {
responseStr = JSON.stringify(jsonPathOutput);
}
const responseMetadata = {
jsonPathResponseStr,
};
const response = new Response(responseStr, []);
response.metadata = responseMetadata;
return response;
}
}
+1
View File
@@ -1,3 +1,4 @@
export * from "./JSONQueryEngine.js";
export * from "./RetrieverQueryEngine.js";
export * from "./RouterQueryEngine.js";
export * from "./SubQuestionQueryEngine.js";
@@ -0,0 +1,36 @@
export const defaultJsonPathPrompt = ({
query,
schema,
}: {
query: string;
schema: string;
}) => `
We have provided a JSON schema below:
${schema}
Given a task, respond with a JSON Path query that can retrieve data from a JSON value that matches the schema.
Task: ${query}
JSONPath:
`;
export type JSONPathPrompt = typeof defaultJsonPathPrompt;
export const defaultResponseSynthesizePrompt = ({
query,
jsonSchema,
jsonPath,
jsonPathValue,
}: {
query: string;
jsonSchema: string;
jsonPath: string;
jsonPathValue: string;
}) => `
Given a query, synthesize a response to satisfy the query using the JSON results. Only include details that are relevant to the query. If you don't know the answer, then say that.
JSON Schema: ${jsonSchema}
JSON Path: ${jsonPath}
Value at path: ${jsonPathValue}
Query: ${query}
Response:
`;
export type ResponseSynthesisPrompt = typeof defaultResponseSynthesizePrompt;
@@ -0,0 +1,87 @@
import { defaultOutputProcessor } from "llamaindex";
import { describe, expect, it } from "vitest";
describe("JSONQueryEngine", () => {
const jsonValue = {
blogPosts: [
{
id: 1,
title: "First blog post",
content: "This is my first blog post",
},
{
id: 2,
title: "Second blog post",
content: "This is my second blog post",
},
],
comments: [
{
id: 1,
content: "Nice post!",
username: "jerry",
blogPostId: 1,
},
{
id: 2,
content: "Interesting thoughts",
username: "simon",
blogPostId: 2,
},
{
id: 3,
content: "Loved reading this!",
username: "simon",
blogPostId: 2,
},
],
};
it("should be able to output parse", async () => {
const values = await defaultOutputProcessor({
llmOutput: "$..comments[?(@.username=='simon')].content",
jsonValue,
});
expect(values).toEqual([
{
content: "Interesting thoughts",
},
{
content: "Loved reading this!",
},
]);
});
it("should be able to output parse with extra strings", async () => {
const values = await defaultOutputProcessor({
llmOutput: "`$..comments[?(@.username=='simon')].content`",
jsonValue,
});
expect(values).toEqual([
{
content: "Interesting thoughts",
},
{
content: "Loved reading this!",
},
]);
});
it("should be able to return a complete object", async () => {
const object = await defaultOutputProcessor({
llmOutput: "`$..comments[?(@.id=='1')]`",
jsonValue,
});
expect(object).toEqual([
{
id: 1,
content: "Nice post!",
username: "jerry",
blogPostId: 1,
},
]);
});
});
+84
View File
@@ -186,6 +186,9 @@ importers:
'@qdrant/js-client-rest':
specifier: ^1.7.0
version: 1.7.0(typescript@5.3.3)
'@types/jsonpath':
specifier: ^0.2.4
version: 0.2.4
'@types/lodash':
specifier: ^4.14.202
version: 4.14.202
@@ -216,6 +219,9 @@ importers:
js-tiktoken:
specifier: ^1.0.10
version: 1.0.10
jsonpath:
specifier: ^1.1.1
version: 1.1.1
lodash:
specifier: ^4.17.21
version: 4.17.21
@@ -4240,6 +4246,10 @@ packages:
/@types/json5@0.0.29:
resolution: {integrity: sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==}
/@types/jsonpath@0.2.4:
resolution: {integrity: sha512-K3hxB8Blw0qgW6ExKgMbXQv2UPZBoE2GqLpVY+yr7nMD2Pq86lsuIzyAaiQ7eMqFL5B6di6pxSkogLJEyEHoGA==}
dev: false
/@types/keyv@3.1.4:
resolution: {integrity: sha512-BQ5aZNSCpj7D6K2ksrRCTmKRLEpnPvWDiLPfoGyhZ++8YtiK9d/3DBKPJgry359X/P1PfruyYwvnvwFjuEiEIg==}
dependencies:
@@ -7194,6 +7204,19 @@ packages:
resolution: {integrity: sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==}
engines: {node: '>=12'}
/escodegen@1.14.3:
resolution: {integrity: sha512-qFcX0XJkdg+PB3xjZZG/wKSuT1PnQWx57+TVSjIMmILd2yC/6ByYElPwJnslDsuWuSAp4AwJGumarAAmJch5Kw==}
engines: {node: '>=4.0'}
hasBin: true
dependencies:
esprima: 4.0.1
estraverse: 4.3.0
esutils: 2.0.3
optionator: 0.8.3
optionalDependencies:
source-map: 0.6.1
dev: false
/escodegen@2.1.0:
resolution: {integrity: sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==}
engines: {node: '>=6.0'}
@@ -7515,6 +7538,12 @@ packages:
acorn-jsx: 5.3.2(acorn@8.11.3)
eslint-visitor-keys: 3.4.3
/esprima@1.2.2:
resolution: {integrity: sha512-+JpPZam9w5DuJ3Q67SqsMGtiHKENSMRVoxvArfJZK01/BfLEObtZ6orJa/MtoGNR/rfMgp5837T41PAmTwAv/A==}
engines: {node: '>=0.4.0'}
hasBin: true
dev: false
/esprima@4.0.1:
resolution: {integrity: sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==}
engines: {node: '>=4'}
@@ -9483,6 +9512,14 @@ packages:
optionalDependencies:
graceful-fs: 4.2.11
/jsonpath@1.1.1:
resolution: {integrity: sha512-l6Cg7jRpixfbgoWgkrl77dgEj8RPvND0wMH6TwQmi9Qs4TFfS9u5cUFnbeKTwj5ga5Y3BTGGNI28k117LJ009w==}
dependencies:
esprima: 1.2.2
static-eval: 2.0.2
underscore: 1.12.1
dev: false
/jsx-ast-utils@3.3.3:
resolution: {integrity: sha512-fYQHZTZ8jSfmWZ0iyzfwiU4WDX4HpHbMCZ3gPlWYiCl3BoeOTsqKBqnTVfH2rYT7eP5c3sVbeSPHnnJOaTrWiw==}
engines: {node: '>=4.0'}
@@ -9559,6 +9596,14 @@ packages:
resolution: {integrity: sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==}
engines: {node: '>=6'}
/levn@0.3.0:
resolution: {integrity: sha512-0OO4y2iOHix2W6ujICbKIaEQXvFQHue65vUG3pb5EUomzPI90z9hsA1VsO/dbIIpC53J8gxM9Q4Oho0jrCM/yA==}
engines: {node: '>= 0.8.0'}
dependencies:
prelude-ls: 1.1.2
type-check: 0.3.2
dev: false
/levn@0.4.1:
resolution: {integrity: sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==}
engines: {node: '>= 0.8.0'}
@@ -11159,6 +11204,18 @@ packages:
resolution: {integrity: sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==}
dev: false
/optionator@0.8.3:
resolution: {integrity: sha512-+IW9pACdk3XWmmTXG8m3upGUJst5XRGzxMRjXzAuJ1XnIFNvfhjjIuYkDvysnPQ7qzqVzLt78BCruntqRhWQbA==}
engines: {node: '>= 0.8.0'}
dependencies:
deep-is: 0.1.4
fast-levenshtein: 2.0.6
levn: 0.3.0
prelude-ls: 1.1.2
type-check: 0.3.2
word-wrap: 1.2.5
dev: false
/optionator@0.9.3:
resolution: {integrity: sha512-JjCoypp+jKn1ttEFExxhetCKeJt9zhAgAve5FXHixTvFDW/5aEktX9bufBKLRRMdU7bNtpLfcGu94B3cdEJgjg==}
engines: {node: '>= 0.8.0'}
@@ -12183,6 +12240,11 @@ packages:
which-pm: 2.0.0
dev: true
/prelude-ls@1.1.2:
resolution: {integrity: sha512-ESF23V4SKG6lVSGZgYNpbsiaAkdab6ZgOxe52p7+Kid3W3u3bxR4Vfd/o21dmN7jSt0IwgZ4v5MUd26FEtXE9w==}
engines: {node: '>= 0.8.0'}
dev: false
/prelude-ls@1.2.1:
resolution: {integrity: sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==}
engines: {node: '>= 0.8.0'}
@@ -13649,6 +13711,12 @@ packages:
resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==}
dev: true
/static-eval@2.0.2:
resolution: {integrity: sha512-N/D219Hcr2bPjLxPiV+TQE++Tsmrady7TqAJugLy7Xk1EumfDWS/f5dtBbkRCGE7wKKXuYockQoj8Rm2/pVKyg==}
dependencies:
escodegen: 1.14.3
dev: false
/statuses@1.5.0:
resolution: {integrity: sha512-OpZ3zP+jT1PI7I8nemJX4AKmAX070ZkYPVWV/AaKTJl+tXCTGyVdC1a4SL8RUQYEwk/f34ZX8UTykN68FwrqAA==}
engines: {node: '>= 0.6'}
@@ -14450,6 +14518,13 @@ packages:
turbo-windows-arm64: 1.12.3
dev: true
/type-check@0.3.2:
resolution: {integrity: sha512-ZCmOJdvOWDBYJlzAoFkC+Q0+bUyEOS1ltgp1MGU03fqHG+dbi9tBFU2Rd9QKiDZFAYrhPh2JUf7rZRIuHRKtOg==}
engines: {node: '>= 0.8.0'}
dependencies:
prelude-ls: 1.1.2
dev: false
/type-check@0.4.0:
resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==}
engines: {node: '>= 0.8.0'}
@@ -14604,6 +14679,10 @@ packages:
has-symbols: 1.0.3
which-boxed-primitive: 1.0.2
/underscore@1.12.1:
resolution: {integrity: sha512-hEQt0+ZLDVUMhebKxL4x1BTtDY7bavVofhZ9KZ4aI26X9SRaE+Y3m83XUL1UP2jn8ynjndwCCpEHdUG+9pP1Tw==}
dev: false
/underscore@1.13.6:
resolution: {integrity: sha512-+A5Sja4HP1M08MaXya7p5LvjuM7K6q/2EaC0+iovj/wOcMsTzMvDFbasi/oSapiwOlt252IqsKqPjCl7huKS0A==}
dev: false
@@ -15361,6 +15440,11 @@ packages:
winston-transport: 4.6.0
dev: false
/word-wrap@1.2.5:
resolution: {integrity: sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==}
engines: {node: '>=0.10.0'}
dev: false
/wordwrap@1.0.0:
resolution: {integrity: sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q==}
dev: true