From 4e1e986e79248da53c697aa227570a0df4dc9fe8 Mon Sep 17 00:00:00 2001 From: Thuc Pham <51660321+thucpn@users.noreply.github.com> Date: Thu, 14 Nov 2024 12:25:48 +0700 Subject: [PATCH] feat: sync with latest CL (#2) * feat: sync with latest CL * declare use client --- .gitignore | 1 + app/api/chat/engine/chat.ts | 4 +- app/api/chat/engine/generate.ts | 7 +- app/api/chat/engine/index.ts | 7 +- app/api/chat/engine/loader.ts | 2 +- app/api/chat/engine/shared.ts | 1 - app/api/chat/engine/tools/form-filling.ts | 296 + app/api/chat/engine/tools/index.ts | 30 + app/api/chat/engine/tools/interpreter.ts | 15 +- app/api/chat/llamaindex/documents/helper.ts | 33 +- app/api/chat/llamaindex/documents/pipeline.ts | 14 +- app/api/chat/llamaindex/documents/upload.ts | 59 +- .../chat/llamaindex/streaming/annotations.ts | 91 +- app/api/chat/upload/route.ts | 16 +- app/api/files/[...slug]/route.ts | 6 +- app/components/chat-section.tsx | 54 +- app/components/ui/README.md | 1 - app/components/ui/chat/chat-actions.tsx | 28 - .../chat/{chat-message => }/chat-avatar.tsx | 6 +- app/components/ui/chat/chat-input.tsx | 167 +- .../ui/chat/chat-message-content.tsx | 38 + .../chat/chat-message/chat-agent-events.tsx | 153 - .../ui/chat/chat-message/chat-events.tsx | 50 - .../ui/chat/chat-message/chat-files.tsx | 16 - .../ui/chat/chat-message/chat-image.tsx | 17 - .../ui/chat/chat-message/chat-sources.tsx | 173 - .../chat-message/chat-suggestedQuestions.tsx | 31 - .../ui/chat/chat-message/chat-tools.tsx | 40 - .../ui/chat/chat-message/codeblock.tsx | 131 - app/components/ui/chat/chat-message/index.tsx | 184 - .../ui/chat/chat-message/markdown.tsx | 170 - app/components/ui/chat/chat-messages.tsx | 156 +- app/components/ui/chat/chat-starter.tsx | 26 + app/components/ui/chat/chat.interface.ts | 25 - .../llama-cloud-selector.tsx} | 8 +- app/components/ui/chat/custom/markdown.tsx | 29 + app/components/ui/chat/hooks/use-file.ts | 136 - app/components/ui/chat/index.ts | 136 - .../Artifact.tsx => tools/artifact.tsx} | 15 +- app/components/ui/chat/tools/chat-tools.tsx | 103 + .../weather-card.tsx} | 0 app/components/ui/chat/widgets/PdfDialog.tsx | 67 - app/components/ui/document-preview.tsx | 129 - app/components/ui/file-uploader.tsx | 105 - app/components/ui/progress.tsx | 27 + app/components/ui/upload-image-preview.tsx | 32 - app/layout.tsx | 1 - app/markdown.css | 79 - app/observability/index.ts | 1 + next.config.json | 31 +- package.json | 32 +- pnpm-lock.yaml | 11341 ++++++++++++++++ tailwind.config.ts | 6 +- tsconfig.json | 3 +- 54 files changed, 12193 insertions(+), 2136 deletions(-) delete mode 100644 app/api/chat/engine/shared.ts create mode 100644 app/api/chat/engine/tools/form-filling.ts delete mode 100644 app/components/ui/README.md delete mode 100644 app/components/ui/chat/chat-actions.tsx rename app/components/ui/chat/{chat-message => }/chat-avatar.tsx (78%) create mode 100644 app/components/ui/chat/chat-message-content.tsx delete mode 100644 app/components/ui/chat/chat-message/chat-agent-events.tsx delete mode 100644 app/components/ui/chat/chat-message/chat-events.tsx delete mode 100644 app/components/ui/chat/chat-message/chat-files.tsx delete mode 100644 app/components/ui/chat/chat-message/chat-image.tsx delete mode 100644 app/components/ui/chat/chat-message/chat-sources.tsx delete mode 100644 app/components/ui/chat/chat-message/chat-suggestedQuestions.tsx delete mode 100644 app/components/ui/chat/chat-message/chat-tools.tsx delete mode 100644 app/components/ui/chat/chat-message/codeblock.tsx delete mode 100644 app/components/ui/chat/chat-message/index.tsx delete mode 100644 app/components/ui/chat/chat-message/markdown.tsx create mode 100644 app/components/ui/chat/chat-starter.tsx delete mode 100644 app/components/ui/chat/chat.interface.ts rename app/components/ui/chat/{widgets/LlamaCloudSelector.tsx => custom/llama-cloud-selector.tsx} (96%) create mode 100644 app/components/ui/chat/custom/markdown.tsx delete mode 100644 app/components/ui/chat/hooks/use-file.ts delete mode 100644 app/components/ui/chat/index.ts rename app/components/ui/chat/{widgets/Artifact.tsx => tools/artifact.tsx} (98%) create mode 100644 app/components/ui/chat/tools/chat-tools.tsx rename app/components/ui/chat/{widgets/WeatherCard.tsx => tools/weather-card.tsx} (100%) delete mode 100644 app/components/ui/chat/widgets/PdfDialog.tsx delete mode 100644 app/components/ui/document-preview.tsx delete mode 100644 app/components/ui/file-uploader.tsx create mode 100644 app/components/ui/progress.tsx delete mode 100644 app/components/ui/upload-image-preview.tsx delete mode 100644 app/markdown.css create mode 100644 pnpm-lock.yaml diff --git a/.gitignore b/.gitignore index e3c55c0..ff1b11e 100644 --- a/.gitignore +++ b/.gitignore @@ -36,4 +36,5 @@ next-env.d.ts output/ cache/ +.cache/ .env \ No newline at end of file diff --git a/app/api/chat/engine/chat.ts b/app/api/chat/engine/chat.ts index bf2bb69..333ff64 100644 --- a/app/api/chat/engine/chat.ts +++ b/app/api/chat/engine/chat.ts @@ -1,7 +1,7 @@ import { BaseChatEngine, BaseToolWithCall, - OpenAIAgent, + LLMAgent, QueryEngineTool, } from "llamaindex"; import fs from "node:fs/promises"; @@ -42,7 +42,7 @@ export async function createChatEngine(documentIds?: string[], params?: any) { tools.push(...(await createTools(toolConfig))); } - const agent = new OpenAIAgent({ + const agent = new LLMAgent({ tools, systemPrompt: process.env.SYSTEM_PROMPT, }) as unknown as BaseChatEngine; diff --git a/app/api/chat/engine/generate.ts b/app/api/chat/engine/generate.ts index 595b27d..4647361 100644 --- a/app/api/chat/engine/generate.ts +++ b/app/api/chat/engine/generate.ts @@ -5,7 +5,6 @@ import * as dotenv from "dotenv"; import { getDocuments } from "./loader"; import { initSettings } from "./settings"; -import { STORAGE_CACHE_DIR } from "./shared"; // Load environment variables from local .env file dotenv.config(); @@ -20,9 +19,13 @@ async function getRuntime(func: any) { async function generateDatasource() { console.log(`Generating storage context...`); // Split documents, create embeddings and store them in the storage context + const persistDir = process.env.STORAGE_CACHE_DIR; + if (!persistDir) { + throw new Error("STORAGE_CACHE_DIR environment variable is required!"); + } const ms = await getRuntime(async () => { const storageContext = await storageContextFromDefaults({ - persistDir: STORAGE_CACHE_DIR, + persistDir, }); const documents = await getDocuments(); diff --git a/app/api/chat/engine/index.ts b/app/api/chat/engine/index.ts index fecc76f..d38ea60 100644 --- a/app/api/chat/engine/index.ts +++ b/app/api/chat/engine/index.ts @@ -1,10 +1,13 @@ import { SimpleDocumentStore, VectorStoreIndex } from "llamaindex"; import { storageContextFromDefaults } from "llamaindex/storage/StorageContext"; -import { STORAGE_CACHE_DIR } from "./shared"; export async function getDataSource(params?: any) { + const persistDir = process.env.STORAGE_CACHE_DIR; + if (!persistDir) { + throw new Error("STORAGE_CACHE_DIR environment variable is required!"); + } const storageContext = await storageContextFromDefaults({ - persistDir: `${STORAGE_CACHE_DIR}`, + persistDir, }); const numberOfDocs = Object.keys( diff --git a/app/api/chat/engine/loader.ts b/app/api/chat/engine/loader.ts index 1c01a47..85deb47 100644 --- a/app/api/chat/engine/loader.ts +++ b/app/api/chat/engine/loader.ts @@ -1,7 +1,7 @@ import { FILE_EXT_TO_READER, SimpleDirectoryReader, -} from "llamaindex/readers/SimpleDirectoryReader"; +} from "llamaindex/readers/index"; export const DATA_DIR = "./data"; diff --git a/app/api/chat/engine/shared.ts b/app/api/chat/engine/shared.ts deleted file mode 100644 index e7736e5..0000000 --- a/app/api/chat/engine/shared.ts +++ /dev/null @@ -1 +0,0 @@ -export const STORAGE_CACHE_DIR = "./cache"; diff --git a/app/api/chat/engine/tools/form-filling.ts b/app/api/chat/engine/tools/form-filling.ts new file mode 100644 index 0000000..6ac0a52 --- /dev/null +++ b/app/api/chat/engine/tools/form-filling.ts @@ -0,0 +1,296 @@ +import { JSONSchemaType } from "ajv"; +import fs from "fs"; +import { BaseTool, Settings, ToolMetadata } from "llamaindex"; +import Papa from "papaparse"; +import path from "path"; +import { saveDocument } from "../../llamaindex/documents/helper"; + +type ExtractMissingCellsParameter = { + filePath: string; +}; + +export type MissingCell = { + rowIndex: number; + columnIndex: number; + question: string; +}; + +const CSV_EXTRACTION_PROMPT = `You are a data analyst. You are given a table with missing cells. +Your task is to identify the missing cells and the questions needed to fill them. +IMPORTANT: Column indices should be 0-based + +# Instructions: +- Understand the entire content of the table and the topics of the table. +- Identify the missing cells and the meaning of the data in the cells. +- For each missing cell, provide the row index and the correct column index (remember: first data column is 1). +- For each missing cell, provide the question needed to fill the cell (it's important to provide the question that is relevant to the topic of the table). +- Since the cell's value should be concise, the question should request a numerical answer or a specific value. +- Finally, only return the answer in JSON format with the following schema: +{ + "missing_cells": [ + { + "rowIndex": number, + "columnIndex": number, + "question": string + } + ] +} +- If there are no missing cells, return an empty array. +- The answer is only the JSON object, nothing else and don't wrap it inside markdown code block. + +# Example: +# | | Name | Age | City | +# |----|------|-----|------| +# | 0 | John | | Paris| +# | 1 | Mary | | | +# | 2 | | 30 | | +# +# Your thoughts: +# - The table is about people's names, ages, and cities. +# - Row: 1, Column: 2 (Age column), Question: "How old is Mary? Please provide only the numerical answer." +# - Row: 1, Column: 3 (City column), Question: "In which city does Mary live? Please provide only the city name." +# Your answer: +# { +# "missing_cells": [ +# { +# "rowIndex": 1, +# "columnIndex": 2, +# "question": "How old is Mary? Please provide only the numerical answer." +# }, +# { +# "rowIndex": 1, +# "columnIndex": 3, +# "question": "In which city does Mary live? Please provide only the city name." +# } +# ] +# } + + +# Here is your task: + +- Table content: +{table_content} + +- Your answer: +`; + +const DEFAULT_METADATA: ToolMetadata< + JSONSchemaType +> = { + name: "extract_missing_cells", + description: `Use this tool to extract missing cells in a CSV file and generate questions to fill them. This tool only works with local file path.`, + parameters: { + type: "object", + properties: { + filePath: { + type: "string", + description: "The local file path to the CSV file.", + }, + }, + required: ["filePath"], + }, +}; + +export interface ExtractMissingCellsParams { + metadata?: ToolMetadata>; +} + +export class ExtractMissingCellsTool + implements BaseTool +{ + metadata: ToolMetadata>; + defaultExtractionPrompt: string; + + constructor(params: ExtractMissingCellsParams) { + this.metadata = params.metadata ?? DEFAULT_METADATA; + this.defaultExtractionPrompt = CSV_EXTRACTION_PROMPT; + } + + private readCsvFile(filePath: string): Promise { + return new Promise((resolve, reject) => { + fs.readFile(filePath, "utf8", (err, data) => { + if (err) { + reject(err); + return; + } + + const parsedData = Papa.parse(data, { + skipEmptyLines: false, + }); + + if (parsedData.errors.length) { + reject(parsedData.errors); + return; + } + + // Ensure all rows have the same number of columns as the header + const maxColumns = parsedData.data[0].length; + const paddedRows = parsedData.data.map((row) => { + return [...row, ...Array(maxColumns - row.length).fill("")]; + }); + + resolve(paddedRows); + }); + }); + } + + private formatToMarkdownTable(data: string[][]): string { + if (data.length === 0) return ""; + + const maxColumns = data[0].length; + + const headerRow = `| ${data[0].join(" | ")} |`; + const separatorRow = `| ${Array(maxColumns).fill("---").join(" | ")} |`; + + const dataRows = data.slice(1).map((row) => { + return `| ${row.join(" | ")} |`; + }); + + return [headerRow, separatorRow, ...dataRows].join("\n"); + } + + async call(input: ExtractMissingCellsParameter): Promise { + const { filePath } = input; + let tableContent: string[][]; + try { + tableContent = await this.readCsvFile(filePath); + } catch (error) { + throw new Error( + `Failed to read CSV file. Make sure that you are reading a local file path (not a sandbox path).`, + ); + } + + const prompt = this.defaultExtractionPrompt.replace( + "{table_content}", + this.formatToMarkdownTable(tableContent), + ); + + const llm = Settings.llm; + const response = await llm.complete({ + prompt, + }); + const rawAnswer = response.text; + const parsedResponse = JSON.parse(rawAnswer) as { + missing_cells: MissingCell[]; + }; + if (!parsedResponse.missing_cells) { + throw new Error( + "The answer is not in the correct format. There should be a missing_cells array.", + ); + } + const answer = parsedResponse.missing_cells; + + return answer; + } +} + +type FillMissingCellsParameter = { + filePath: string; + cells: { + rowIndex: number; + columnIndex: number; + answer: string; + }[]; +}; + +const FILL_CELLS_METADATA: ToolMetadata< + JSONSchemaType +> = { + name: "fill_missing_cells", + description: `Use this tool to fill missing cells in a CSV file with provided answers. This tool only works with local file path.`, + parameters: { + type: "object", + properties: { + filePath: { + type: "string", + description: "The local file path to the CSV file.", + }, + cells: { + type: "array", + items: { + type: "object", + properties: { + rowIndex: { type: "number" }, + columnIndex: { type: "number" }, + answer: { type: "string" }, + }, + required: ["rowIndex", "columnIndex", "answer"], + }, + description: "Array of cells to fill with their answers", + }, + }, + required: ["filePath", "cells"], + }, +}; + +export interface FillMissingCellsParams { + metadata?: ToolMetadata>; +} + +export class FillMissingCellsTool + implements BaseTool +{ + metadata: ToolMetadata>; + + constructor(params: FillMissingCellsParams = {}) { + this.metadata = params.metadata ?? FILL_CELLS_METADATA; + } + + async call(input: FillMissingCellsParameter): Promise { + const { filePath, cells } = input; + + // Read the CSV file + const fileContent = await new Promise((resolve, reject) => { + fs.readFile(filePath, "utf8", (err, data) => { + if (err) { + reject(err); + } else { + resolve(data); + } + }); + }); + + // Parse CSV with PapaParse + const parseResult = Papa.parse(fileContent, { + header: false, // Ensure the header is not treated as a separate object + skipEmptyLines: false, // Ensure empty lines are not skipped + }); + + if (parseResult.errors.length) { + throw new Error( + "Failed to parse CSV file: " + parseResult.errors[0].message, + ); + } + + const rows = parseResult.data; + + // Fill the cells with answers + for (const cell of cells) { + // Adjust rowIndex to start from 1 for data rows + const adjustedRowIndex = cell.rowIndex + 1; + if ( + adjustedRowIndex < rows.length && + cell.columnIndex < rows[adjustedRowIndex].length + ) { + rows[adjustedRowIndex][cell.columnIndex] = cell.answer; + } + } + + // Convert back to CSV format + const updatedContent = Papa.unparse(rows, { + delimiter: parseResult.meta.delimiter, + }); + + // Use the helper function to write the file + const parsedPath = path.parse(filePath); + const newFileName = `${parsedPath.name}-filled${parsedPath.ext}`; + const newFilePath = path.join("output/tools", newFileName); + + const newFileUrl = await saveDocument(newFilePath, updatedContent); + + return ( + "Successfully filled missing cells in the CSV file. File URL to show to the user: " + + newFileUrl + ); + } +} diff --git a/app/api/chat/engine/tools/index.ts b/app/api/chat/engine/tools/index.ts index 062e2eb..c49de77 100644 --- a/app/api/chat/engine/tools/index.ts +++ b/app/api/chat/engine/tools/index.ts @@ -1,11 +1,19 @@ import { BaseToolWithCall } from "llamaindex"; import { ToolsFactory } from "llamaindex/tools/ToolsFactory"; +import fs from "node:fs/promises"; +import path from "node:path"; import { CodeGeneratorTool, CodeGeneratorToolParams } from "./code-generator"; import { DocumentGenerator, DocumentGeneratorParams, } from "./document-generator"; import { DuckDuckGoSearchTool, DuckDuckGoToolParams } from "./duckduckgo"; +import { + ExtractMissingCellsParams, + ExtractMissingCellsTool, + FillMissingCellsParams, + FillMissingCellsTool, +} from "./form-filling"; import { ImgGeneratorTool, ImgGeneratorToolParams } from "./img-gen"; import { InterpreterTool, InterpreterToolParams } from "./interpreter"; import { OpenAPIActionTool } from "./openapi-action"; @@ -54,6 +62,12 @@ const toolFactory: Record = { document_generator: async (config: unknown) => { return [new DocumentGenerator(config as DocumentGeneratorParams)]; }, + form_filling: async (config: unknown) => { + return [ + new ExtractMissingCellsTool(config as ExtractMissingCellsParams), + new FillMissingCellsTool(config as FillMissingCellsParams), + ]; + }, }; async function createLocalTools( @@ -70,3 +84,19 @@ async function createLocalTools( return tools; } + +export async function getConfiguredTools( + configPath?: string, +): Promise { + const configFile = path.join(configPath ?? "config", "tools.json"); + const toolConfig = JSON.parse(await fs.readFile(configFile, "utf8")); + const tools = await createTools(toolConfig); + return tools; +} + +export async function getTool( + toolName: string, +): Promise { + const tools = await getConfiguredTools(); + return tools.find((tool) => tool.metadata.name === toolName); +} diff --git a/app/api/chat/engine/tools/interpreter.ts b/app/api/chat/engine/tools/interpreter.ts index ae386a1..44cc7cb 100644 --- a/app/api/chat/engine/tools/interpreter.ts +++ b/app/api/chat/engine/tools/interpreter.ts @@ -111,13 +111,16 @@ export class InterpreterTool implements BaseTool { // upload files to sandbox if (input.sandboxFiles) { console.log(`Uploading ${input.sandboxFiles.length} files to sandbox`); - for (const filePath of input.sandboxFiles) { - const fileName = path.basename(filePath); - const localFilePath = path.join(this.uploadedFilesDir, fileName); - const content = fs.readFileSync(localFilePath); - await this.codeInterpreter?.files.write(filePath, content); + try { + for (const filePath of input.sandboxFiles) { + const fileName = path.basename(filePath); + const localFilePath = path.join(this.uploadedFilesDir, fileName); + const content = fs.readFileSync(localFilePath); + await this.codeInterpreter?.files.write(filePath, content); + } + } catch (error) { + console.error("Got error when uploading files to sandbox", error); } - console.log(`Uploaded ${input.sandboxFiles.length} files to sandbox`); } return this.codeInterpreter; } diff --git a/app/api/chat/llamaindex/documents/helper.ts b/app/api/chat/llamaindex/documents/helper.ts index 52cc5d9..a6d18c9 100644 --- a/app/api/chat/llamaindex/documents/helper.ts +++ b/app/api/chat/llamaindex/documents/helper.ts @@ -3,6 +3,7 @@ import crypto from "node:crypto"; import fs from "node:fs"; import path from "node:path"; import { getExtractors } from "../../engine/loader"; +import { DocumentFile } from "../streaming/annotations"; const MIME_TYPE_TO_EXT: Record = { "application/pdf": "pdf", @@ -12,29 +13,22 @@ const MIME_TYPE_TO_EXT: Record = { "docx", }; -const UPLOADED_FOLDER = "output/uploaded"; - -export type FileMetadata = { - id: string; - name: string; - url: string; - refs: string[]; -}; +export const UPLOADED_FOLDER = "output/uploaded"; export async function storeAndParseFile( - filename: string, + name: string, fileBuffer: Buffer, mimeType: string, -): Promise { - const fileMetadata = await storeFile(filename, fileBuffer, mimeType); - const documents: Document[] = await parseFile(fileBuffer, filename, mimeType); +): Promise { + const file = await storeFile(name, fileBuffer, mimeType); + const documents: Document[] = await parseFile(fileBuffer, name, mimeType); // Update document IDs in the file metadata - fileMetadata.refs = documents.map((document) => document.id_ as string); - return fileMetadata; + file.refs = documents.map((document) => document.id_ as string); + return file; } export async function storeFile( - filename: string, + name: string, fileBuffer: Buffer, mimeType: string, ) { @@ -42,15 +36,17 @@ export async function storeFile( if (!fileExt) throw new Error(`Unsupported document type: ${mimeType}`); const fileId = crypto.randomUUID(); - const newFilename = `${fileId}_${sanitizeFileName(filename)}`; + const newFilename = `${sanitizeFileName(name)}_${fileId}.${fileExt}`; const filepath = path.join(UPLOADED_FOLDER, newFilename); const fileUrl = await saveDocument(filepath, fileBuffer); return { id: fileId, name: newFilename, + size: fileBuffer.length, + type: fileExt, url: fileUrl, refs: [] as string[], - } as FileMetadata; + } as DocumentFile; } export async function parseFile( @@ -104,5 +100,6 @@ export async function saveDocument(filepath: string, content: string | Buffer) { } function sanitizeFileName(fileName: string) { - return fileName.replace(/[^a-zA-Z0-9_.-]/g, "_"); + // Remove file extension and sanitize + return fileName.split(".")[0].replace(/[^a-zA-Z0-9_-]/g, "_"); } diff --git a/app/api/chat/llamaindex/documents/pipeline.ts b/app/api/chat/llamaindex/documents/pipeline.ts index 01b52fd..cd4d6d0 100644 --- a/app/api/chat/llamaindex/documents/pipeline.ts +++ b/app/api/chat/llamaindex/documents/pipeline.ts @@ -3,6 +3,7 @@ import { IngestionPipeline, Settings, SimpleNodeParser, + storageContextFromDefaults, VectorStoreIndex, } from "llamaindex"; @@ -28,11 +29,20 @@ export async function runPipeline( return documents.map((document) => document.id_); } else { // Initialize a new index with the documents - const newIndex = await VectorStoreIndex.fromDocuments(documents); - newIndex.storageContext.docStore.persist(); console.log( "Got empty index, created new index with the uploaded documents", ); + const persistDir = process.env.STORAGE_CACHE_DIR; + if (!persistDir) { + throw new Error("STORAGE_CACHE_DIR environment variable is required!"); + } + const storageContext = await storageContextFromDefaults({ + persistDir, + }); + const newIndex = await VectorStoreIndex.fromDocuments(documents, { + storageContext, + }); + await newIndex.storageContext.docStore.persist(); return documents.map((document) => document.id_); } } diff --git a/app/api/chat/llamaindex/documents/upload.ts b/app/api/chat/llamaindex/documents/upload.ts index 158b05a..b3786a3 100644 --- a/app/api/chat/llamaindex/documents/upload.ts +++ b/app/api/chat/llamaindex/documents/upload.ts @@ -1,41 +1,39 @@ import { Document, LLamaCloudFileService, VectorStoreIndex } from "llamaindex"; import { LlamaCloudIndex } from "llamaindex/cloud/LlamaCloudIndex"; -import fs from "node:fs/promises"; -import path from "node:path"; -import { FileMetadata, parseFile, storeFile } from "./helper"; +import { DocumentFile } from "../streaming/annotations"; +import { parseFile, storeFile } from "./helper"; import { runPipeline } from "./pipeline"; export async function uploadDocument( index: VectorStoreIndex | LlamaCloudIndex | null, - filename: string, + name: string, raw: string, -): Promise { +): Promise { const [header, content] = raw.split(","); const mimeType = header.replace("data:", "").replace(";base64", ""); const fileBuffer = Buffer.from(content, "base64"); // Store file - const fileMetadata = await storeFile(filename, fileBuffer, mimeType); + const fileMetadata = await storeFile(name, fileBuffer, mimeType); - // If the file is csv and has codeExecutorTool, we don't need to index the file. - if (mimeType === "text/csv" && (await hasCodeExecutorTool())) { + // Do not index csv files + if (mimeType === "text/csv") { return fileMetadata; } - + let documentIds: string[] = []; if (index instanceof LlamaCloudIndex) { // trigger LlamaCloudIndex API to upload the file and run the pipeline const projectId = await index.getProjectId(); const pipelineId = await index.getPipelineId(); try { - const documentId = await LLamaCloudFileService.addFileToPipeline( - projectId, - pipelineId, - new File([fileBuffer], filename, { type: mimeType }), - { private: "true" }, - ); - // Update file metadata with document IDs - fileMetadata.refs = [documentId]; - return fileMetadata; + documentIds = [ + await LLamaCloudFileService.addFileToPipeline( + projectId, + pipelineId, + new File([fileBuffer], name, { type: mimeType }), + { private: "true" }, + ), + ]; } catch (error) { if ( error instanceof ReferenceError && @@ -47,24 +45,17 @@ export async function uploadDocument( } throw error; } + } else { + // run the pipeline for other vector store indexes + const documents: Document[] = await parseFile( + fileBuffer, + fileMetadata.name, + mimeType, + ); + documentIds = await runPipeline(index, documents); } - // run the pipeline for other vector store indexes - const documents: Document[] = await parseFile(fileBuffer, filename, mimeType); // Update file metadata with document IDs - fileMetadata.refs = documents.map((document) => document.id_ as string); - // Run the pipeline - await runPipeline(index, documents); + fileMetadata.refs = documentIds; return fileMetadata; } - -const hasCodeExecutorTool = async () => { - const codeExecutorTools = ["interpreter", "artifact"]; - - const configFile = path.join("config", "tools.json"); - const toolConfig = JSON.parse(await fs.readFile(configFile, "utf8")); - - const localTools = toolConfig.local || {}; - // Check if local tools contains codeExecutorTools - return codeExecutorTools.some((tool) => localTools[tool] !== undefined); -}; diff --git a/app/api/chat/llamaindex/streaming/annotations.ts b/app/api/chat/llamaindex/streaming/annotations.ts index f8de88f..164ddca 100644 --- a/app/api/chat/llamaindex/streaming/annotations.ts +++ b/app/api/chat/llamaindex/streaming/annotations.ts @@ -1,19 +1,21 @@ import { JSONValue, Message } from "ai"; -import { MessageContent, MessageContentDetail } from "llamaindex"; +import { + ChatMessage, + MessageContent, + MessageContentDetail, + MessageType, +} from "llamaindex"; +import { UPLOADED_FOLDER } from "../documents/helper"; export type DocumentFileType = "csv" | "pdf" | "txt" | "docx"; -export type UploadedFileMeta = { +export type DocumentFile = { id: string; name: string; - url?: string; - refs?: string[]; -}; - -export type DocumentFile = { - type: DocumentFileType; + size: number; + type: string; url: string; - metadata: UploadedFileMeta; + refs?: string[]; }; type Annotation = { @@ -30,7 +32,7 @@ export function isValidMessages(messages: Message[]): boolean { export function retrieveDocumentIds(messages: Message[]): string[] { // retrieve document Ids from the annotations of all messages (if any) const documentFiles = retrieveDocumentFiles(messages); - return documentFiles.map((file) => file.metadata?.refs || []).flat(); + return documentFiles.map((file) => file.refs || []).flat(); } export function retrieveDocumentFiles(messages: Message[]): DocumentFile[] { @@ -62,17 +64,55 @@ export function retrieveMessageContent(messages: Message[]): MessageContent { ]; } +export function convertToChatHistory(messages: Message[]): ChatMessage[] { + if (!messages || !Array.isArray(messages)) { + return []; + } + const agentHistory = retrieveAgentHistoryMessage(messages); + if (agentHistory) { + const previousMessages = messages.slice(0, -1); + return [...previousMessages, agentHistory].map((msg) => ({ + role: msg.role as MessageType, + content: msg.content, + })); + } + return messages.map((msg) => ({ + role: msg.role as MessageType, + content: msg.content, + })); +} + +function retrieveAgentHistoryMessage( + messages: Message[], + maxAgentMessages = 10, +): ChatMessage | null { + const agentAnnotations = getAnnotations<{ agent: string; text: string }>( + messages, + { role: "assistant", type: "agent" }, + ).slice(-maxAgentMessages); + + if (agentAnnotations.length > 0) { + const messageContent = + "Here is the previous conversation of agents:\n" + + agentAnnotations.map((annotation) => annotation.data.text).join("\n"); + return { + role: "assistant", + content: messageContent, + }; + } + return null; +} + function getFileContent(file: DocumentFile): string { - const fileMetadata = file.metadata; - let defaultContent = `=====File: ${fileMetadata.name}=====\n`; + let defaultContent = `=====File: ${file.name}=====\n`; // Include file URL if it's available const urlPrefix = process.env.FILESERVER_URL_PREFIX; let urlContent = ""; if (urlPrefix) { - if (fileMetadata.url) { - urlContent = `File URL: ${fileMetadata.url}\n`; + if (file.url) { + urlContent = `File URL: ${file.url}\n`; } else { - urlContent = `File URL (instruction: do not update this file URL yourself): ${urlPrefix}/output/uploaded/${fileMetadata.name}\n`; + urlContent = `File URL (instruction: do not update this file URL yourself): ${urlPrefix}/output/uploaded/${file.name}\n`; } } else { console.warn( @@ -82,13 +122,17 @@ function getFileContent(file: DocumentFile): string { defaultContent += urlContent; // Include document IDs if it's available - if (fileMetadata.refs) { - defaultContent += `Document IDs: ${fileMetadata.refs}\n`; + if (file.refs) { + defaultContent += `Document IDs: ${file.refs}\n`; } // Include sandbox file paths - const sandboxFilePath = `/tmp/${fileMetadata.name}`; + const sandboxFilePath = `/tmp/${file.name}`; defaultContent += `Sandbox file path (instruction: only use sandbox path for artifact or code interpreter tool): ${sandboxFilePath}\n`; + // Include local file path + const localFilePath = `${UPLOADED_FOLDER}/${file.name}`; + defaultContent += `Local file path (instruction: use for local tool that requires a local path): ${localFilePath}\n`; + return defaultContent; } @@ -132,13 +176,10 @@ function retrieveLatestArtifact(messages: Message[]): MessageContentDetail[] { } function convertAnnotations(messages: Message[]): MessageContentDetail[] { - // annotations from the last user message that has annotations - const annotations: Annotation[] = - messages - .slice() - .reverse() - .find((message) => message.role === "user" && message.annotations) - ?.annotations?.map(getValidAnnotation) || []; + // get all annotations from user messages + const annotations: Annotation[] = messages + .filter((message) => message.role === "user" && message.annotations) + .flatMap((message) => message.annotations?.map(getValidAnnotation) || []); if (annotations.length === 0) return []; const content: MessageContentDetail[] = []; diff --git a/app/api/chat/upload/route.ts b/app/api/chat/upload/route.ts index 382a94c..05939e3 100644 --- a/app/api/chat/upload/route.ts +++ b/app/api/chat/upload/route.ts @@ -11,19 +11,23 @@ export const dynamic = "force-dynamic"; export async function POST(request: NextRequest) { try { const { - filename, + name, base64, params, - }: { filename: string; base64: string; params?: any } = - await request.json(); - if (!base64 || !filename) { + }: { + name: string; + base64: string; + params?: any; + } = await request.json(); + if (!base64 || !name) { return NextResponse.json( - { error: "base64 and filename is required in the request body" }, + { error: "base64 and name is required in the request body" }, { status: 400 }, ); } const index = await getDataSource(params); - return NextResponse.json(await uploadDocument(index, filename, base64)); + const documentFile = await uploadDocument(index, name, base64); + return NextResponse.json(documentFile); } catch (error) { console.error("[Upload API]", error); return NextResponse.json( diff --git a/app/api/files/[...slug]/route.ts b/app/api/files/[...slug]/route.ts index 5bb2e06..7ccaeda 100644 --- a/app/api/files/[...slug]/route.ts +++ b/app/api/files/[...slug]/route.ts @@ -9,9 +9,9 @@ import { DATA_DIR } from "../../chat/engine/loader"; */ export async function GET( _request: NextRequest, - { params }: { params: { slug: string[] } }, + { params }: { params: Promise<{ slug: string[] }> }, ) { - const slug = params.slug; + const slug = (await params).slug; if (!slug) { return NextResponse.json({ detail: "Missing file slug" }, { status: 400 }); @@ -21,7 +21,7 @@ export async function GET( return NextResponse.json({ detail: "Invalid file path" }, { status: 400 }); } - const [folder, ...pathTofile] = params.slug; // data, file.pdf + const [folder, ...pathTofile] = slug; // data, file.pdf const allowedFolders = ["data", "output"]; if (!allowedFolders.includes(folder)) { diff --git a/app/components/chat-section.tsx b/app/components/chat-section.tsx index e7e489b..46b0054 100644 --- a/app/components/chat-section.tsx +++ b/app/components/chat-section.tsx @@ -1,57 +1,27 @@ "use client"; +import { ChatSection as ChatSectionUI } from "@llamaindex/chat-ui"; +import "@llamaindex/chat-ui/styles/code.css"; +import "@llamaindex/chat-ui/styles/katex.css"; +import "@llamaindex/chat-ui/styles/pdf.css"; import { useChat } from "ai/react"; -import { useState } from "react"; -import { ChatInput, ChatMessages } from "./ui/chat"; +import CustomChatInput from "./ui/chat/chat-input"; +import CustomChatMessages from "./ui/chat/chat-messages"; import { useClientConfig } from "./ui/chat/hooks/use-config"; export default function ChatSection() { const { backend } = useClientConfig(); - const [requestData, setRequestData] = useState(); - const { - messages, - input, - isLoading, - handleSubmit, - handleInputChange, - reload, - stop, - append, - setInput, - } = useChat({ - body: { data: requestData }, + const handler = useChat({ api: `${backend}/api/chat`, - headers: { - "Content-Type": "application/json", // using JSON because of vercel/ai 2.2.26 - }, onError: (error: unknown) => { if (!(error instanceof Error)) throw error; - const message = JSON.parse(error.message); - alert(message.detail); + alert(JSON.parse(error.message).detail); }, - sendExtraMessageFields: true, }); - return ( -
- - -
+ + + + ); } diff --git a/app/components/ui/README.md b/app/components/ui/README.md deleted file mode 100644 index ebfcf48..0000000 --- a/app/components/ui/README.md +++ /dev/null @@ -1 +0,0 @@ -Using the chat component from https://github.com/marcusschiesser/ui (based on https://ui.shadcn.com/) diff --git a/app/components/ui/chat/chat-actions.tsx b/app/components/ui/chat/chat-actions.tsx deleted file mode 100644 index 151ef61..0000000 --- a/app/components/ui/chat/chat-actions.tsx +++ /dev/null @@ -1,28 +0,0 @@ -import { PauseCircle, RefreshCw } from "lucide-react"; - -import { Button } from "../button"; -import { ChatHandler } from "./chat.interface"; - -export default function ChatActions( - props: Pick & { - showReload?: boolean; - showStop?: boolean; - }, -) { - return ( -
- {props.showStop && ( - - )} - {props.showReload && ( - - )} -
- ); -} diff --git a/app/components/ui/chat/chat-message/chat-avatar.tsx b/app/components/ui/chat/chat-avatar.tsx similarity index 78% rename from app/components/ui/chat/chat-message/chat-avatar.tsx rename to app/components/ui/chat/chat-avatar.tsx index ce04e30..cfa307c 100644 --- a/app/components/ui/chat/chat-message/chat-avatar.tsx +++ b/app/components/ui/chat/chat-avatar.tsx @@ -1,8 +1,10 @@ +import { useChatMessage } from "@llamaindex/chat-ui"; import { User2 } from "lucide-react"; import Image from "next/image"; -export default function ChatAvatar({ role }: { role: string }) { - if (role === "user") { +export function ChatMessageAvatar() { + const { message } = useChatMessage(); + if (message.role === "user") { return (
diff --git a/app/components/ui/chat/chat-input.tsx b/app/components/ui/chat/chat-input.tsx index ce2b02d..7b11168 100644 --- a/app/components/ui/chat/chat-input.tsx +++ b/app/components/ui/chat/chat-input.tsx @@ -1,33 +1,13 @@ -import { JSONValue } from "ai"; -import React from "react"; -import { Button } from "../button"; -import { DocumentPreview } from "../document-preview"; -import FileUploader from "../file-uploader"; -import { Textarea } from "../textarea"; -import UploadImagePreview from "../upload-image-preview"; -import { ChatHandler } from "./chat.interface"; -import { useFile } from "./hooks/use-file"; -import { LlamaCloudSelector } from "./widgets/LlamaCloudSelector"; +"use client"; -const ALLOWED_EXTENSIONS = ["png", "jpg", "jpeg", "csv", "pdf", "txt", "docx"]; +import { ChatInput, useChatUI, useFile } from "@llamaindex/chat-ui"; +import { DocumentInfo, ImagePreview } from "@llamaindex/chat-ui/widgets"; +import { LlamaCloudSelector } from "./custom/llama-cloud-selector"; +import { useClientConfig } from "./hooks/use-config"; -export default function ChatInput( - props: Pick< - ChatHandler, - | "isLoading" - | "input" - | "onFileUpload" - | "onFileError" - | "handleSubmit" - | "handleInputChange" - | "messages" - | "setInput" - | "append" - > & { - requestParams?: any; - setRequestData?: React.Dispatch; - }, -) { +export default function CustomChatInput() { + const { requestData, isLoading, input } = useChatUI(); + const { backend } = useClientConfig(); const { imageUrl, setImageUrl, @@ -36,101 +16,66 @@ export default function ChatInput( removeDoc, reset, getAnnotations, - } = useFile(); - - // default submit function does not handle including annotations in the message - // so we need to use append function to submit new message with annotations - const handleSubmitWithAnnotations = ( - e: React.FormEvent, - annotations: JSONValue[] | undefined, - ) => { - e.preventDefault(); - props.append!({ - content: props.input, - role: "user", - createdAt: new Date(), - annotations, - }); - props.setInput!(""); - }; - - const onSubmit = (e: React.FormEvent) => { - e.preventDefault(); - const annotations = getAnnotations(); - if (annotations.length) { - handleSubmitWithAnnotations(e, annotations); - return reset(); - } - props.handleSubmit(e); - }; + } = useFile({ uploadAPI: `${backend}/api/chat/upload` }); + /** + * Handles file uploads. Overwrite to hook into the file upload behavior. + * @param file The file to upload + */ const handleUploadFile = async (file: File) => { - if (imageUrl || files.length > 0) { - alert("You can only upload one file at a time."); + // There's already an image uploaded, only allow one image at a time + if (imageUrl) { + alert("You can only upload one image at a time."); return; } + try { - await uploadFile(file, props.requestParams); - props.onFileUpload?.(file); + // Upload the file and send with it the current request data + await uploadFile(file, requestData); } catch (error: any) { - const onFileUploadError = props.onFileError || window.alert; - onFileUploadError(error.message); + // Show error message if upload fails + alert(error.message); } }; - const handleKeyDown = (e: React.KeyboardEvent) => { - if (e.key === "Enter" && !e.shiftKey) { - e.preventDefault(); - onSubmit(e as unknown as React.FormEvent); - } - }; + // Get references to the upload files in message annotations format, see https://github.com/run-llama/chat-ui/blob/main/packages/chat-ui/src/hook/use-file.tsx#L56 + const annotations = getAnnotations(); return ( -
- {imageUrl && ( - setImageUrl(null)} /> - )} - {files.length > 0 && ( -
- {files.map((file, index) => ( - removeDoc(file)} - /> - ))} -
- )} -
-