mirror of
https://github.com/Mintplex-Labs/node-llama-cpp.git
synced 2026-07-01 19:55:08 -04:00
feat(minor): save and load history to chat command (#71)
* feat: save and load history to `chat` command * build: support patch bump of minor features * fix: show correct cli command on Windows
This commit is contained in:
@@ -7,6 +7,7 @@
|
||||
["@semantic-release/commit-analyzer", {
|
||||
"preset": "angular",
|
||||
"releaseRules": [
|
||||
{"type": "feat", "scope": "minor", "release": "patch"},
|
||||
{"type": "docs", "scope": "README", "release": "patch"}
|
||||
]
|
||||
}],
|
||||
|
||||
+1
-1
@@ -11,7 +11,7 @@ npm install --save node-llama-cpp
|
||||
|
||||
> `node-llama-cpp` comes with pre-built binaries for macOS, Linux and Windows.
|
||||
>
|
||||
> If binaries are not available for your platform, it'll fallback to download the latest version of `llama.cpp` and build it from source with `cmake`.
|
||||
> If binaries are not available for your platform, it'll fallback to download a release of `llama.cpp` and build it from source with `cmake`.
|
||||
> To disable this behavior, set the environment variable `NODE_LLAMA_CPP_SKIP_DOWNLOAD` to `true`.
|
||||
|
||||
## CUDA and Metal support
|
||||
|
||||
@@ -6,6 +6,7 @@ import yargs from "yargs";
|
||||
// eslint-disable-next-line node/file-extension-in-import
|
||||
import {hideBin} from "yargs/helpers";
|
||||
import fs from "fs-extra";
|
||||
import {cliBinName} from "../config.js";
|
||||
import {DownloadCommand} from "./commands/DownloadCommand.js";
|
||||
import {BuildCommand} from "./commands/BuildCommand.js";
|
||||
import {OnPostInstallCommand} from "./commands/OnPostInstallCommand.js";
|
||||
@@ -19,6 +20,7 @@ const packageJson = fs.readJSONSync(path.join(__dirname, "..", "..", "package.js
|
||||
const yarg = yargs(hideBin(process.argv));
|
||||
|
||||
yarg
|
||||
.scriptName(cliBinName)
|
||||
.usage("Usage: $0 <command> [options]")
|
||||
.command(DownloadCommand)
|
||||
.command(BuildCommand)
|
||||
|
||||
@@ -5,7 +5,7 @@ import {CommandModule} from "yargs";
|
||||
import chalk from "chalk";
|
||||
import fs from "fs-extra";
|
||||
import withOra from "../../utils/withOra.js";
|
||||
import {defaultChatSystemPrompt} from "../../config.js";
|
||||
import {chatCommandHistoryFilePath, defaultChatSystemPrompt} from "../../config.js";
|
||||
import {LlamaChatPromptWrapper} from "../../chatWrappers/LlamaChatPromptWrapper.js";
|
||||
import {GeneralChatPromptWrapper} from "../../chatWrappers/GeneralChatPromptWrapper.js";
|
||||
import {ChatMLChatPromptWrapper} from "../../chatWrappers/ChatMLChatPromptWrapper.js";
|
||||
@@ -13,6 +13,7 @@ import {getChatWrapperByBos} from "../../chatWrappers/createChatWrapperByBos.js"
|
||||
import {ChatPromptWrapper} from "../../ChatPromptWrapper.js";
|
||||
import {FalconChatPromptWrapper} from "../../chatWrappers/FalconChatPromptWrapper.js";
|
||||
import {getIsInDocumentationMode} from "../../state.js";
|
||||
import {ReplHistory} from "../../utils/ReplHistory.js";
|
||||
import type {LlamaGrammar} from "../../llamaEvaluator/LlamaGrammar.js";
|
||||
|
||||
const modelWrappers = ["auto", "general", "llamaChat", "chatML", "falconChat"] as const;
|
||||
@@ -36,7 +37,8 @@ type ChatCommand = {
|
||||
penalizeRepeatingNewLine: boolean,
|
||||
repeatFrequencyPenalty?: number,
|
||||
repeatPresencePenalty?: number,
|
||||
maxTokens: number
|
||||
maxTokens: number,
|
||||
noHistory: boolean
|
||||
};
|
||||
|
||||
export const ChatCommand: CommandModule<object, ChatCommand> = {
|
||||
@@ -176,19 +178,26 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
|
||||
default: 0,
|
||||
description: "Maximum number of tokens to generate in responses. Set to `0` to disable. Set to `-1` to set to the context size",
|
||||
group: "Optional:"
|
||||
})
|
||||
.option("noHistory", {
|
||||
alias: "nh",
|
||||
type: "boolean",
|
||||
default: false,
|
||||
description: "Don't load or save chat history",
|
||||
group: "Optional:"
|
||||
});
|
||||
},
|
||||
async handler({
|
||||
model, systemInfo, systemPrompt, prompt, wrapper, contextSize,
|
||||
grammar, jsonSchemaGrammarFile, threads, temperature, topK, topP,
|
||||
gpuLayers, repeatPenalty, lastTokensRepeatPenalty, penalizeRepeatingNewLine,
|
||||
repeatFrequencyPenalty, repeatPresencePenalty, maxTokens
|
||||
repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory
|
||||
}) {
|
||||
try {
|
||||
await RunChat({
|
||||
model, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar, jsonSchemaGrammarFile, threads, temperature, topK,
|
||||
topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty, penalizeRepeatingNewLine, repeatFrequencyPenalty,
|
||||
repeatPresencePenalty, maxTokens
|
||||
repeatPresencePenalty, maxTokens, noHistory
|
||||
});
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
@@ -201,7 +210,7 @@ export const ChatCommand: CommandModule<object, ChatCommand> = {
|
||||
async function RunChat({
|
||||
model: modelArg, systemInfo, systemPrompt, prompt, wrapper, contextSize, grammar: grammarArg,
|
||||
jsonSchemaGrammarFile: jsonSchemaGrammarFilePath, threads, temperature, topK, topP, gpuLayers, lastTokensRepeatPenalty, repeatPenalty,
|
||||
penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens
|
||||
penalizeRepeatingNewLine, repeatFrequencyPenalty, repeatPresencePenalty, maxTokens, noHistory
|
||||
}: ChatCommand) {
|
||||
const {LlamaChatSession} = await import("../../llamaEvaluator/LlamaChatSession.js");
|
||||
const {LlamaModel} = await import("../../llamaEvaluator/LlamaModel.js");
|
||||
@@ -273,21 +282,32 @@ async function RunChat({
|
||||
// this is for ora to not interfere with readline
|
||||
await new Promise(resolve => setTimeout(resolve, 1));
|
||||
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout
|
||||
});
|
||||
const replHistory = await ReplHistory.load(chatCommandHistoryFilePath, !noHistory);
|
||||
|
||||
async function getPrompt() {
|
||||
const rl = readline.createInterface({
|
||||
input: process.stdin,
|
||||
output: process.stdout,
|
||||
history: replHistory.history.slice()
|
||||
});
|
||||
|
||||
const res: string = await new Promise((accept) => rl.question(chalk.yellow("> "), accept));
|
||||
rl.close();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
// eslint-disable-next-line no-constant-condition
|
||||
while (true) {
|
||||
const input: string = initialPrompt != null
|
||||
const input = initialPrompt != null
|
||||
? initialPrompt
|
||||
: await new Promise((accept) => rl.question(chalk.yellow("> "), accept));
|
||||
: await getPrompt();
|
||||
|
||||
if (initialPrompt != null) {
|
||||
console.log(chalk.green("> ") + initialPrompt);
|
||||
initialPrompt = null;
|
||||
}
|
||||
} else
|
||||
await replHistory.add(input);
|
||||
|
||||
if (input === ".exit")
|
||||
break;
|
||||
|
||||
@@ -18,6 +18,7 @@ export const llamaBinsGrammarsDirectory = path.join(__dirname, "..", "llama", "g
|
||||
export const llamaCppDirectory = path.join(llamaDirectory, "llama.cpp");
|
||||
export const llamaCppGrammarsDirectory = path.join(llamaDirectory, "llama.cpp", "grammars");
|
||||
export const tempDownloadDirectory = path.join(os.tmpdir(), "node-llama-cpp", uuid.v4());
|
||||
export const chatCommandHistoryFilePath = path.join(os.homedir(), ".node-llama-cpp.chat_repl_history");
|
||||
export const usedBinFlagJsonPath = path.join(llamaDirectory, "usedBin.json");
|
||||
export const binariesGithubReleasePath = path.join(llamaDirectory, "binariesGithubRelease.json");
|
||||
export const currentReleaseGitBundlePath = path.join(llamaDirectory, "gitRelease.bundle");
|
||||
|
||||
@@ -0,0 +1,85 @@
|
||||
import fs from "fs-extra";
|
||||
import {withLock} from "./withLock.js";
|
||||
|
||||
type ReplyHistoryFile = {
|
||||
history: string[]
|
||||
};
|
||||
|
||||
const emptyHistory: ReplyHistoryFile = {
|
||||
history: []
|
||||
};
|
||||
|
||||
export class ReplHistory {
|
||||
private readonly _filePath: string | null;
|
||||
private _fileContent: ReplyHistoryFile;
|
||||
|
||||
private constructor(filePath: string | null, fileContent: ReplyHistoryFile) {
|
||||
this._filePath = filePath;
|
||||
this._fileContent = fileContent;
|
||||
}
|
||||
|
||||
public async add(line: string) {
|
||||
if (this._filePath == null) {
|
||||
this._fileContent = this._addItemToHistory(line, this._fileContent);
|
||||
return;
|
||||
}
|
||||
|
||||
await withLock(this, "file", async () => {
|
||||
try {
|
||||
const json = parseReplJsonfile(await fs.readJSON(this._filePath!));
|
||||
this._fileContent = this._addItemToHistory(line, json);
|
||||
|
||||
await fs.writeJSON(this._filePath!, this._fileContent, {
|
||||
spaces: 4
|
||||
});
|
||||
} catch (err) {}
|
||||
});
|
||||
}
|
||||
|
||||
public get history(): readonly string[] {
|
||||
return this._fileContent.history;
|
||||
}
|
||||
|
||||
private _addItemToHistory(item: string, fileContent: ReplyHistoryFile) {
|
||||
const newHistory = fileContent.history.slice();
|
||||
const currentItemIndex = newHistory.indexOf(item);
|
||||
|
||||
if (currentItemIndex !== -1)
|
||||
newHistory.splice(currentItemIndex, 1);
|
||||
|
||||
newHistory.unshift(item);
|
||||
|
||||
return {
|
||||
...fileContent,
|
||||
history: newHistory
|
||||
};
|
||||
}
|
||||
|
||||
public static async load(filePath: string, saveAndLoadHistory: boolean = true) {
|
||||
if (!saveAndLoadHistory)
|
||||
return new ReplHistory(null, {
|
||||
history: []
|
||||
});
|
||||
|
||||
try {
|
||||
if (!(await fs.pathExists(filePath)))
|
||||
await fs.writeJSON(filePath, emptyHistory, {
|
||||
spaces: 4
|
||||
});
|
||||
|
||||
const json = parseReplJsonfile(await fs.readJSON(filePath));
|
||||
return new ReplHistory(filePath, json);
|
||||
} catch (err) {
|
||||
return new ReplHistory(null, {
|
||||
history: []
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function parseReplJsonfile(file: unknown): ReplyHistoryFile {
|
||||
if (typeof file !== "object" || file == null || !("history" in file) || !(file.history instanceof Array) || file.history.some((item) => typeof item !== "string"))
|
||||
throw new Error("Invalid ReplyHistory file");
|
||||
|
||||
return file as ReplyHistoryFile;
|
||||
}
|
||||
Reference in New Issue
Block a user