This commit is contained in:
bracesproul
2025-04-14 17:18:01 -07:00
parent 2e5f392e98
commit c384784a7e
11 changed files with 98 additions and 209 deletions
+61 -152
View File
@@ -1,277 +1,186 @@
import { v4 as uuidv4 } from "uuid";
import * as ls from "langsmith/jest";
import { HumanMessage } from "@langchain/core/messages";
import { Client } from "@langchain/langgraph-sdk";
const inputs = [
{
inputs: {
messages: [
new HumanMessage(
"I'd like approval to explore integrating generative AI into our customer support chatbot. This could significantly improve response accuracy and customer satisfaction. I plan to start with a prototype to evaluate potential impacts.",
),
],
query:
"I'd like approval to explore integrating generative AI into our customer support chatbot. This could significantly improve response accuracy and customer satisfaction. I plan to start with a prototype to evaluate potential impacts.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Requesting permission to initiate a small-scale pilot project using generative AI to enhance our code review system. This could automate routine checks and free up engineers to focus on complex issues. I'll provide regular updates on the pilots progress.",
),
],
query:
"Requesting permission to initiate a small-scale pilot project using generative AI to enhance our code review system. This could automate routine checks and free up engineers to focus on complex issues. I'll provide regular updates on the pilots progress.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I'm seeking approval to research and potentially implement generative AI for automatically generating documentation for our APIs. This initiative could greatly reduce manual documentation efforts and improve consistency across our products.",
),
],
query:
"I'm seeking approval to research and potentially implement generative AI for automatically generating documentation for our APIs. This initiative could greatly reduce manual documentation efforts and improve consistency across our products.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I propose we explore using generative AI to create synthetic datasets for testing our software products. This method could improve our test coverage and help detect bugs earlier. I'd like authorization to proceed with initial experiments.",
),
],
query:
"I propose we explore using generative AI to create synthetic datasets for testing our software products. This method could improve our test coverage and help detect bugs earlier. I'd like authorization to proceed with initial experiments.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Requesting approval to integrate generative AI into our existing software development pipeline for automatic code generation. This could improve developer productivity and reduce the time spent on repetitive coding tasks.",
),
],
query:
"Requesting approval to integrate generative AI into our existing software development pipeline for automatic code generation. This could improve developer productivity and reduce the time spent on repetitive coding tasks.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I would like to request approval to attend the AWS re:Invent conference next month. This will help me stay updated with the latest cloud technologies, which are directly relevant to our ongoing projects.",
),
],
query:
"I would like to request approval to attend the AWS re:Invent conference next month. This will help me stay updated with the latest cloud technologies, which are directly relevant to our ongoing projects.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Could I have permission to access the company's AWS account to perform testing on the new deployment pipeline? This access is essential for integrating and verifying recent development tasks.",
),
],
query:
"Could I have permission to access the company's AWS account to perform testing on the new deployment pipeline? This access is essential for integrating and verifying recent development tasks.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I'm requesting approval to enroll in an advanced JavaScript training course. This training will significantly boost my frontend development skills, directly benefiting our current web projects.",
),
],
query:
"I'm requesting approval to enroll in an advanced JavaScript training course. This training will significantly boost my frontend development skills, directly benefiting our current web projects.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Can I get authorization to purchase a new software license for JetBrains IntelliJ IDEA? My current IDE license is about to expire, and continued access is vital for my daily development work.",
),
],
query:
"Can I get authorization to purchase a new software license for JetBrains IntelliJ IDEA? My current IDE license is about to expire, and continued access is vital for my daily development work.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I would like to request approval for a two-week extension on the upcoming project deadline. Recent unforeseen technical challenges require extra time to ensure quality delivery.",
),
],
query:
"I would like to request approval for a two-week extension on the upcoming project deadline. Recent unforeseen technical challenges require extra time to ensure quality delivery.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Please approve my request to work remotely for the next two weeks due to personal circumstances. My productivity and availability will remain unchanged during this period.",
),
],
query:
"Please approve my request to work remotely for the next two weeks due to personal circumstances. My productivity and availability will remain unchanged during this period.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Can I have approval to upgrade my current laptop to a more powerful model? My existing machine is struggling with resource-intensive tasks, reducing my productivity.",
),
],
query:
"Can I have approval to upgrade my current laptop to a more powerful model? My existing machine is struggling with resource-intensive tasks, reducing my productivity.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I'm requesting permission to join the internal machine learning workshop next week. This knowledge aligns closely with my project responsibilities and will enhance my team's effectiveness.",
),
],
query:
"I'm requesting permission to join the internal machine learning workshop next week. This knowledge aligns closely with my project responsibilities and will enhance my team's effectiveness.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Could you approve my request for a day off next Friday? I have personal matters to attend to and will ensure all my duties are covered beforehand.",
),
],
query:
"Could you approve my request for a day off next Friday? I have personal matters to attend to and will ensure all my duties are covered beforehand.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Please approve my request to renew my professional certification in cybersecurity. Maintaining this certification is essential for compliance with our security policy.",
),
],
query:
"Please approve my request to renew my professional certification in cybersecurity. Maintaining this certification is essential for compliance with our security policy.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I would like permission to conduct a code refactoring session for our legacy software module. This initiative aims to improve the maintainability and performance of our system.",
),
],
query:
"I would like permission to conduct a code refactoring session for our legacy software module. This initiative aims to improve the maintainability and performance of our system.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Can I request approval to collaborate with an external consultant on our next software architecture review? Their expertise can provide valuable insights into improving our system's scalability.",
),
],
query:
"Can I request approval to collaborate with an external consultant on our next software architecture review? Their expertise can provide valuable insights into improving our system's scalability.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I'm requesting to attend a one-day seminar on Agile project management. It will help me implement more effective practices within our team.",
),
],
query:
"I'm requesting to attend a one-day seminar on Agile project management. It will help me implement more effective practices within our team.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Please approve my request for access to the beta version of the new API framework we're planning to adopt. Early access will allow me to start evaluating its potential benefits and challenges.",
),
],
query:
"Please approve my request for access to the beta version of the new API framework we're planning to adopt. Early access will allow me to start evaluating its potential benefits and challenges.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I'd like approval to take a half-day off next Wednesday for a healthcare appointment. I'll ensure my tasks are on schedule and will make up for the lost hours later.",
),
],
query:
"I'd like approval to take a half-day off next Wednesday for a healthcare appointment. I'll ensure my tasks are on schedule and will make up for the lost hours later.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Can I get approval to purchase a subscription to an online learning platform such as Pluralsight? This will facilitate continuous learning and skill improvement relevant to my role.",
),
],
query:
"Can I get approval to purchase a subscription to an online learning platform such as Pluralsight? This will facilitate continuous learning and skill improvement relevant to my role.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Please approve my request to shift my work hours temporarily from 10 am to 6 pm for the next month. This adjustment will help me better coordinate with our overseas team.",
),
],
query:
"Please approve my request to shift my work hours temporarily from 10 am to 6 pm for the next month. This adjustment will help me better coordinate with our overseas team.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I'm requesting permission to set up a new staging environment for our latest software update. This environment will enable thorough testing and reduce the risk of deployment issues.",
),
],
query:
"I'm requesting permission to set up a new staging environment for our latest software update. This environment will enable thorough testing and reduce the risk of deployment issues.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Could you approve my participation in a hackathon focused on emerging technologies? It will enhance my skills and provide creative solutions beneficial to our projects.",
),
],
query:
"Could you approve my participation in a hackathon focused on emerging technologies? It will enhance my skills and provide creative solutions beneficial to our projects.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I'd like to request approval to use the corporate credit card for purchasing a new ergonomic office chair. My current chair is causing discomfort, impacting my productivity.",
),
],
query:
"I'd like to request approval to use the corporate credit card for purchasing a new ergonomic office chair. My current chair is causing discomfort, impacting my productivity.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Please approve my request to take part in the company's mentorship program as a mentor. This will help me develop leadership skills and contribute positively to our team culture.",
),
],
query:
"Please approve my request to take part in the company's mentorship program as a mentor. This will help me develop leadership skills and contribute positively to our team culture.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I'm requesting approval to initiate a minor budget allocation for team-building activities. This will help boost morale and collaboration among our team members.",
),
],
query:
"I'm requesting approval to initiate a minor budget allocation for team-building activities. This will help boost morale and collaboration among our team members.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Can I get authorization to submit a proposal for implementing an automated testing framework? This will enhance our software quality assurance and reduce manual testing time.",
),
],
query:
"Can I get authorization to submit a proposal for implementing an automated testing framework? This will enhance our software quality assurance and reduce manual testing time.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"Please approve my request to work from home on Fridays regularly. I have found that I can be more productive and focused on specific tasks when working remotely.",
),
],
query:
"Please approve my request to work from home on Fridays regularly. I have found that I can be more productive and focused on specific tasks when working remotely.",
},
},
{
inputs: {
messages: [
new HumanMessage(
"I'd like approval to attend a training session on database optimization techniques. This knowledge is directly applicable to improving our current database performance.",
),
],
query:
"I'd like approval to attend a training session on database optimization techniques. This knowledge is directly applicable to improving our current database performance.",
},
},
];
@@ -291,7 +200,7 @@ ls.describe("LLManager", () => {
console.log("Using assistant ID:", assistantId);
});
ls.test.each(inputs)("E2E Test", async ({ inputs }) => {
ls.test.each(inputs.slice(0, 4))("E2E Test", async ({ inputs }) => {
const threadId = uuidv4();
const result = await client.runs.wait(threadId, assistantId, {
input: inputs,
+6 -8
View File
@@ -1,11 +1,13 @@
import { z } from "zod";
import { AgentState, AgentUpdate } from "../types.js";
import { findQueryStringOrThrow } from "../../utils/query.js";
import { LangGraphRunnableConfig } from "@langchain/langgraph";
import { loadModelFromConfig } from "../../utils/model.js";
const FINAL_ANSWER_PROMPT = `You're a highly advanced AI manager, tasked with approving or rejecting one of your employees requests.
Here is the users request:
{REQUEST}
To assist with this task, you're provided with the following context:
- Examples of previous requests, along with their outcomes. These are previous requests, and the final outcome you came to, along with the reasoning behind that outcome.
- Reflections you've made on previous requests. This will contain your thoughts and insights into previous requests and their outcomes.
@@ -15,13 +17,11 @@ To assist with this task, you're provided with the following context:
{CONTEXT}
Finally, you are also given a detailed reasoning report into why the request should be approved or rejected.
{REASONING}
Use all of this context to ground your final decision.
Here is the users request:
Once again, here is the user's request:
{REQUEST}
Ensure your answer is accurate, and accounts for all of the context provided above.
@@ -31,13 +31,11 @@ export async function finalAnswer(
state: AgentState,
config: LangGraphRunnableConfig,
): Promise<AgentUpdate> {
const query = findQueryStringOrThrow(state.messages);
const finalAnswerSchema = z.object({
explanation: z
.string()
.describe(
"The explanation for your final decision. Ensure this is detailed, and clear. It should cover everything you considered when making your final decision. This is the explanation which will be sent back to the employee, along with the status of their request. Ensure it is formatted properly for this.",
"The explanation for your final decision. Ensure this is detailed, and clear, while still being concise and straightforward. It should provide enough information to the user to properly inform them as to why their request was accepted/rejected. This is the explanation which will be sent back to the employee, along with the status of their request. Ensure it is formatted properly for this type of communication.",
),
status: z
.enum(["approved", "rejected"])
@@ -51,7 +49,7 @@ export async function finalAnswer(
state.promptContext,
)
.replace("{REASONING}", state.generatedReasoning)
.replace("{REQUEST}", query);
.replaceAll("{REQUEST}", state.query);
const model = await loadModelFromConfig(config, {
temperature: 0,
+3 -6
View File
@@ -9,7 +9,6 @@ import {
Send,
} from "@langchain/langgraph";
import { ReflectionState } from "../../reflection/types.js";
import { findQueryStringOrThrow } from "../../utils/query.js";
import { putFewShotExamples } from "../../stores/few-shot.js";
/**
@@ -106,7 +105,7 @@ async function handleHumanResponse(
// Save the final answer & explanation in store for future use in few-shot examples
await putFewShotExamples(inputs.store, inputs.assistantId, {
input: findQueryStringOrThrow(state.messages),
input: state.query,
answer: updatedAnswer.status,
explanation: updatedAnswer.explanation,
});
@@ -120,7 +119,7 @@ async function handleHumanResponse(
}
const reflectionInput: ReflectionState = {
messages: state.messages,
query: state.query,
generatedReasoning: state.generatedReasoning,
originalAnswer: state.answer,
editedAnswer: updatedAnswer,
@@ -140,10 +139,8 @@ export async function humanNode(
state: AgentState,
config: LangGraphRunnableConfig,
): Promise<Command> {
const query = findQueryStringOrThrow(state.messages);
const description = constructDescription({
request: query,
request: state.query,
explanation: state.answer.explanation,
status: state.answer.status,
});
+6 -2
View File
@@ -1,9 +1,13 @@
import { Annotation, MessagesAnnotation } from "@langchain/langgraph";
import { Annotation } from "@langchain/langgraph";
import "@langchain/langgraph/zod";
import { z } from "zod";
export const AgentZodStateInput = Annotation.Root({
query: Annotation<string>(),
});
export const AgentZodState = Annotation.Root({
messages: MessagesAnnotation.spec["messages"],
...AgentZodStateInput.spec,
promptContext: Annotation<string>(),
generatedReasoning: Annotation<string>(),
answer: Annotation<{
+12 -8
View File
@@ -4,25 +4,31 @@ import {
buildContext,
formatContextPrompt,
} from "../../utils/build-context.js";
import { findQueryStringOrThrow } from "../../utils/query.js";
import { loadModelFromConfig } from "../../utils/model.js";
const INITIAL_REASONING_PROMPT = `You're an AI manager tasked with analyzing and reasoning about a request one of your employees has made.
Your task is to analyze the request from one of your employees, and reason about whether it should be approved or rejected.
<instructions>
Inspect the context, and the users request carefully. You should ONLY use the context provided to reason about the request.
You should never invent criteria or guidelines, unless explicitly stated in the context. Doing this ensures you won't reject or accept requests based on criteria that don't exist.
You are NOT to make a final decision, but rather to weigh the request against all of the below context, and reason about whether it should be approved or rejected.
Ensure your reasoning contains points from both sides of the argument.
</instructions>
<context-descriptions>
You should think through this carefully, accounting for all aspects of their request, and taking into account the following context:
- Examples of previous requests, along with their outcomes. These are previous requests, and the final outcome you came to, along with the reasoning behind that outcome.
- Reflections you've made on previous requests. This will contain your thoughts and insights into previous requests and their outcomes.
- Approval criteria on what types of requests should be approved.
- Rejection criteria on what types of requests should be rejected.
</context-descriptions>
Here is the context:
{CONTEXT}
You are NOT to make a final decision, but rather to weigh the request against all of the above context, and reason about whether it should be approved or rejected.
Ensure your reasoning contains points from both sides of the argument.
The user's message will contain their request. You should ONLY respond with your reasoning, and nothing else before or after it.
Ensure your reasoning is detailed, and clear.`;
@@ -31,9 +37,7 @@ export async function initialReasoning(
state: ReasoningState,
config: LangGraphRunnableConfig,
): Promise<ReasoningUpdate> {
const query = findQueryStringOrThrow(state.messages);
const { fewShotExamples, reflections } = await buildContext(query, {
const { fewShotExamples, reflections } = await buildContext(state.query, {
store: config.store,
assistantId: config.configurable?.assistant_id,
});
@@ -59,7 +63,7 @@ export async function initialReasoning(
},
{
role: "user",
content: query,
content: state.query,
},
]);
+2 -2
View File
@@ -1,9 +1,9 @@
import { Annotation, MessagesAnnotation } from "@langchain/langgraph";
import { Annotation } from "@langchain/langgraph";
import "@langchain/langgraph/zod";
import { z } from "zod";
export const ReasoningZodState = Annotation.Root({
messages: MessagesAnnotation.spec["messages"],
query: Annotation<string>(),
promptContext: Annotation<string>(),
generatedReasoning: Annotation<string>(),
});
@@ -83,7 +83,6 @@ export async function explanationReflection(
});
const model = await loadModelFromConfig(config, {
temperature: 0,
thinking: {
type: "enabled",
budget_tokens: 3072,
-1
View File
@@ -87,7 +87,6 @@ export async function fullReflection(
});
const model = await loadModelFromConfig(config, {
temperature: 0,
thinking: {
type: "enabled",
budget_tokens: 3072,
+2 -2
View File
@@ -1,9 +1,9 @@
import { Annotation, MessagesAnnotation } from "@langchain/langgraph";
import { Annotation } from "@langchain/langgraph";
import "@langchain/langgraph/zod";
import { z } from "zod";
export const ReflectionZodState = Annotation.Root({
messages: MessagesAnnotation.spec["messages"],
query: Annotation<string>(),
generatedReasoning: Annotation<string>(),
originalAnswer: Annotation<{
explanation: string;
+6 -1
View File
@@ -33,7 +33,12 @@ export async function loadModelFromConfig(
const modelId =
config.configurable?.modelId ?? "anthropic/claude-3-7-sonnet-latest";
const model = await initChatModel(modelId, modelConfig);
const provider = modelId.split("/")[0];
const modelName = modelId.split("/").slice(1).join("/");
const model = await initChatModel(modelName, {
...modelConfig,
modelProvider: provider,
});
if (!model.bindTools) {
throw new Error("Model does not support binding tools");
}
-26
View File
@@ -1,26 +0,0 @@
import { BaseMessage } from "@langchain/core/messages";
/**
* Finds the last message in the list of messages that is a human message.
* Returns the content of the message as a string.
* Throws an error if no human message is found.
*
* @param messages - The list of messages to search through.
* @returns The content of the last human message as a string.
*/
export function findQueryStringOrThrow(messages: BaseMessage[]): string {
const content = messages.findLast((m) => m.getType() === "human")?.content;
if (!content) {
throw new Error("No query found");
}
if (typeof content === "string") {
return content;
}
return content
.filter(
(c): c is { type: "text"; text: string } =>
c.type === "text" && "text" in c && c.text,
)
.map((c) => c.text)
.join("\n");
}