mirror of
https://github.com/langchain-ai/tracing-claude-code.git
synced 2026-07-01 20:04:49 -04:00
957 lines
33 KiB
Bash
Executable File
957 lines
33 KiB
Bash
Executable File
#!/bin/bash
|
|
###
|
|
# Claude Code Stop Hook - LangSmith Tracing Integration
|
|
# Sends Claude Code traces to LangSmith after each response.
|
|
###
|
|
|
|
set -e
|
|
|
|
# Config (needed early for logging)
|
|
LOG_FILE="$HOME/.claude/state/hook.log"
|
|
DEBUG="$(echo "$CC_LANGSMITH_DEBUG" | tr '[:upper:]' '[:lower:]')"
|
|
|
|
# Logging functions
|
|
log() {
|
|
local level="$1"
|
|
shift
|
|
echo "$(date '+%Y-%m-%d %H:%M:%S') [$level] $*" >> "$LOG_FILE"
|
|
}
|
|
|
|
debug() {
|
|
if [ "$DEBUG" = "true" ]; then
|
|
log "DEBUG" "$@"
|
|
fi
|
|
}
|
|
|
|
# Immediate debug logging
|
|
debug "Hook started, TRACE_TO_LANGSMITH=$TRACE_TO_LANGSMITH"
|
|
|
|
# Exit early if tracing disabled
|
|
if [ "$(echo "$TRACE_TO_LANGSMITH" | tr '[:upper:]' '[:lower:]')" != "true" ]; then
|
|
debug "Tracing disabled, exiting early"
|
|
exit 0
|
|
fi
|
|
|
|
# Required commands
|
|
for cmd in jq curl uuidgen; do
|
|
if ! command -v "$cmd" &> /dev/null; then
|
|
echo "Error: $cmd is required but not installed" >&2
|
|
exit 0
|
|
fi
|
|
done
|
|
|
|
# Config (continued)
|
|
API_KEY="${CC_LANGSMITH_API_KEY:-$LANGSMITH_API_KEY}"
|
|
PROJECT="${CC_LANGSMITH_PROJECT:-claude-code}"
|
|
API_BASE="https://api.smith.langchain.com"
|
|
STATE_FILE="${STATE_FILE:-$HOME/.claude/state/langsmith_state.json}"
|
|
|
|
# Global variables
|
|
CURRENT_TURN_ID="" # Track current turn run for cleanup on exit
|
|
|
|
# Ensure state directory exists
|
|
mkdir -p "$(dirname "$STATE_FILE")"
|
|
|
|
# Validate API key
|
|
if [ -z "$API_KEY" ]; then
|
|
log "ERROR" "CC_LANGSMITH_API_KEY not set"
|
|
exit 0
|
|
fi
|
|
|
|
# Get microseconds portably (macOS doesn't support date +%N)
|
|
get_microseconds() {
|
|
if command -v gdate &> /dev/null; then
|
|
# Use GNU date if available (brew install coreutils)
|
|
gdate +%6N
|
|
elif [[ "$OSTYPE" == "darwin"* ]]; then
|
|
# macOS fallback: use Python for microseconds
|
|
python3 -c "import time; print(str(int(time.time() * 1000000) % 1000000).zfill(6))"
|
|
else
|
|
# Linux/GNU date
|
|
date +%6N
|
|
fi
|
|
}
|
|
|
|
# Get file size portably (macOS and Linux have different stat syntax)
|
|
get_file_size() {
|
|
local file="$1"
|
|
if [[ "$OSTYPE" == "darwin"* ]]; then
|
|
stat -f%z "$file"
|
|
else
|
|
stat -c%s "$file"
|
|
fi
|
|
}
|
|
|
|
# API call helper
|
|
api_call() {
|
|
local method="$1"
|
|
local endpoint="$2"
|
|
local data="$3"
|
|
|
|
local response
|
|
local http_code
|
|
response=$(curl -s --max-time 60 -w "\n%{http_code}" -X "$method" \
|
|
-H "x-api-key: $API_KEY" \
|
|
-H "Content-Type: application/json" \
|
|
-d "$data" \
|
|
"$API_BASE$endpoint" 2>&1)
|
|
|
|
http_code=$(echo "$response" | tail -n1)
|
|
response=$(echo "$response" | sed '$d')
|
|
|
|
if [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then
|
|
log "ERROR" "API call failed: $method $endpoint"
|
|
log "ERROR" "HTTP $http_code: $response"
|
|
log "ERROR" "Request data: ${data:0:500}"
|
|
return 1
|
|
fi
|
|
|
|
echo "$response"
|
|
}
|
|
|
|
# Cleanup function to complete pending turn run on exit
|
|
cleanup_pending_turn() {
|
|
if [ -n "$CURRENT_TURN_ID" ]; then
|
|
debug "Cleanup: completing pending turn run $CURRENT_TURN_ID"
|
|
local now
|
|
now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
local turn_update
|
|
turn_update=$(jq -n \
|
|
--arg time "$now" \
|
|
'{
|
|
outputs: {messages: []},
|
|
end_time: $time,
|
|
error: "Incomplete: script exited early"
|
|
}')
|
|
|
|
# Try to complete the turn run (ignore errors since we're exiting anyway)
|
|
api_call "PATCH" "/runs/$CURRENT_TURN_ID" "$turn_update" > /dev/null 2>&1 || true
|
|
log "WARN" "Completed pending turn run $CURRENT_TURN_ID due to early exit"
|
|
fi
|
|
}
|
|
|
|
# Set trap to cleanup on exit (EXIT covers normal exit, errors, and interrupts)
|
|
trap cleanup_pending_turn EXIT
|
|
|
|
# Load state
|
|
load_state() {
|
|
if [ ! -f "$STATE_FILE" ]; then
|
|
echo "{}"
|
|
return
|
|
fi
|
|
cat "$STATE_FILE"
|
|
}
|
|
|
|
# Save state
|
|
save_state() {
|
|
local state="$1"
|
|
echo "$state" > "$STATE_FILE"
|
|
}
|
|
|
|
# Get message content
|
|
get_content() {
|
|
local msg="$1"
|
|
echo "$msg" | jq -c 'if type == "object" and has("message") then .message.content elif type == "object" then .content else null end'
|
|
}
|
|
|
|
# Check if message is tool result
|
|
is_tool_result() {
|
|
local msg="$1"
|
|
local content
|
|
content=$(get_content "$msg")
|
|
|
|
if echo "$content" | jq -e 'if type == "array" then any(.[]; type == "object" and .type == "tool_result") else false end' > /dev/null 2>&1; then
|
|
echo "true"
|
|
else
|
|
echo "false"
|
|
fi
|
|
}
|
|
|
|
# Format content blocks for LangSmith
|
|
format_content() {
|
|
local msg="$1"
|
|
local content
|
|
content=$(get_content "$msg")
|
|
|
|
# Handle string content
|
|
if echo "$content" | jq -e 'type == "string"' > /dev/null 2>&1; then
|
|
echo "$content" | jq '[{"type": "text", "text": .}]'
|
|
return
|
|
fi
|
|
|
|
# Handle array content
|
|
if echo "$content" | jq -e 'type == "array"' > /dev/null 2>&1; then
|
|
echo "$content" | jq '[
|
|
.[] |
|
|
if type == "object" then
|
|
if .type == "text" then
|
|
{"type": "text", "text": .text}
|
|
elif .type == "thinking" then
|
|
{"type": "thinking", "thinking": .thinking}
|
|
elif .type == "tool_use" then
|
|
{"type": "tool_call", "name": .name, "args": .input, "id": .id}
|
|
else
|
|
.
|
|
end
|
|
elif type == "string" then
|
|
{"type": "text", "text": .}
|
|
else
|
|
.
|
|
end
|
|
] | if length == 0 then [{"type": "text", "text": ""}] else . end'
|
|
return
|
|
fi
|
|
|
|
# Default
|
|
echo '[{"type": "text", "text": ""}]'
|
|
}
|
|
|
|
# Get tool uses from message
|
|
get_tool_uses() {
|
|
local msg="$1"
|
|
local content
|
|
content=$(get_content "$msg")
|
|
|
|
# Check if content is an array
|
|
if ! echo "$content" | jq -e 'type == "array"' > /dev/null 2>&1; then
|
|
echo "[]"
|
|
return
|
|
fi
|
|
|
|
echo "$content" | jq -c '[.[] | select(type == "object" and .type == "tool_use")]'
|
|
}
|
|
|
|
# Get usage from assistant message parts (takes last for SSE cumulative counts)
|
|
get_usage_from_parts() {
|
|
local parts="$1"
|
|
echo "$parts" | jq -c '
|
|
[.[] | .message.usage // null | select(. != null)] | last // null
|
|
'
|
|
}
|
|
|
|
# Find tool result and timestamp
|
|
# Returns JSON: {result: "...", timestamp: "..."}
|
|
find_tool_result_with_timestamp() {
|
|
local tool_id="$1"
|
|
local tool_results="$2"
|
|
|
|
local result_data
|
|
result_data=$(echo "$tool_results" | jq -c --arg id "$tool_id" '
|
|
first(
|
|
.[] |
|
|
. as $msg |
|
|
(if type == "object" and has("message") then .message.content elif type == "object" then .content else null end) as $content |
|
|
if $content | type == "array" then
|
|
$content[] |
|
|
select(type == "object" and .type == "tool_result" and .tool_use_id == $id) |
|
|
{
|
|
result: (
|
|
if .content | type == "array" then
|
|
[.content[] | select(type == "object" and .type == "text") | .text] | join(" ")
|
|
elif .content | type == "string" then
|
|
.content
|
|
else
|
|
.content | tostring
|
|
end
|
|
),
|
|
timestamp: $msg.timestamp
|
|
}
|
|
else
|
|
empty
|
|
end
|
|
) // {result: "No result", timestamp: null}
|
|
')
|
|
|
|
echo "$result_data"
|
|
}
|
|
|
|
# Merge assistant message parts
|
|
merge_assistant_parts() {
|
|
local current_assistant_parts="$1"
|
|
|
|
# Extract usage from parts (last one for SSE cumulative)
|
|
local usage
|
|
usage=$(get_usage_from_parts "$current_assistant_parts")
|
|
|
|
echo "$current_assistant_parts" | jq -s \
|
|
--argjson usage "$usage" \
|
|
'
|
|
.[0][0] as $base |
|
|
(.[0] | map(if type == "object" and has("message") then .message.content elif type == "object" then .content else null end) | map(select(. != null))) as $contents |
|
|
($contents | map(
|
|
if type == "string" then [{"type":"text","text":.}]
|
|
elif type == "array" then .
|
|
else [.]
|
|
end
|
|
) | add // []) as $merged_content |
|
|
($merged_content | reduce .[] as $item (
|
|
{result: [], buffer: null};
|
|
if $item.type == "text" then
|
|
if .buffer then .buffer.text += $item.text
|
|
else .buffer = $item
|
|
end
|
|
else
|
|
(if .buffer then .result += [.buffer] else . end) |
|
|
.buffer = null | .result += [$item]
|
|
end
|
|
) | if .buffer then .result + [.buffer] else .result end) as $final_content |
|
|
$base |
|
|
if type == "object" and has("message") then
|
|
.message.content = $final_content |
|
|
(if $usage != null then .message._usage = $usage else . end)
|
|
elif type == "object" then
|
|
.content = $final_content |
|
|
(if $usage != null then ._usage = $usage else . end)
|
|
else
|
|
.
|
|
end
|
|
'
|
|
}
|
|
|
|
# Serialize run data for multipart upload
|
|
# Writes parts to temp files and outputs curl -F arguments (one per line)
|
|
serialize_for_multipart() {
|
|
local operation="$1" # "post" or "patch"
|
|
local run_json="$2" # Full run JSON
|
|
local temp_dir="$3" # Temp directory for this batch
|
|
|
|
local run_id
|
|
run_id=$(echo "$run_json" | jq -r '.id')
|
|
|
|
# Extract inputs/outputs from main data
|
|
local inputs
|
|
inputs=$(echo "$run_json" | jq -c '.inputs // empty')
|
|
|
|
local outputs
|
|
outputs=$(echo "$run_json" | jq -c '.outputs // empty')
|
|
|
|
local main_data
|
|
main_data=$(echo "$run_json" | jq -c 'del(.inputs, .outputs)')
|
|
|
|
# Part 1: Main run data with Content-Length header
|
|
local main_file="$temp_dir/${operation}_${run_id}_main.json"
|
|
echo "$main_data" > "$main_file"
|
|
local main_size=$(get_file_size "$main_file")
|
|
echo "-F"
|
|
echo "${operation}.${run_id}=<${main_file};type=application/json;headers=Content-Length:${main_size}"
|
|
|
|
# Part 2: Inputs (if present) with Content-Length header
|
|
if [ "$inputs" != "null" ] && [ -n "$inputs" ]; then
|
|
local inputs_file="$temp_dir/${operation}_${run_id}_inputs.json"
|
|
echo "$inputs" > "$inputs_file"
|
|
local inputs_size=$(get_file_size "$inputs_file")
|
|
echo "-F"
|
|
echo "${operation}.${run_id}.inputs=<${inputs_file};type=application/json;headers=Content-Length:${inputs_size}"
|
|
fi
|
|
|
|
# Part 3: Outputs (if present) with Content-Length header
|
|
if [ "$outputs" != "null" ] && [ -n "$outputs" ]; then
|
|
local outputs_file="$temp_dir/${operation}_${run_id}_outputs.json"
|
|
echo "$outputs" > "$outputs_file"
|
|
local outputs_size=$(get_file_size "$outputs_file")
|
|
echo "-F"
|
|
echo "${operation}.${run_id}.outputs=<${outputs_file};type=application/json;headers=Content-Length:${outputs_size}"
|
|
fi
|
|
}
|
|
|
|
# Send batch of runs via multipart endpoint
|
|
send_multipart_batch() {
|
|
local operation="$1" # "post" or "patch"
|
|
local batch_json="$2" # JSON array of runs
|
|
|
|
# Parse batch size
|
|
local batch_size
|
|
batch_size=$(echo "$batch_json" | jq 'length')
|
|
|
|
if [ "$batch_size" -eq 0 ]; then
|
|
debug "No $operation runs to send"
|
|
return 0
|
|
fi
|
|
|
|
# Create temp directory for this batch
|
|
local temp_dir
|
|
temp_dir=$(mktemp -d)
|
|
|
|
# Build multipart curl command
|
|
local curl_args=()
|
|
curl_args+=("-s" "--max-time" "60" "-w" "\n%{http_code}" "-X" "POST")
|
|
curl_args+=("-H" "x-api-key: $API_KEY")
|
|
|
|
# Serialize each run and collect curl -F arguments
|
|
while IFS= read -r run; do
|
|
# Read arguments line by line (proper array handling, no word splitting)
|
|
while IFS= read -r arg; do
|
|
curl_args+=("$arg")
|
|
done < <(serialize_for_multipart "$operation" "$run" "$temp_dir")
|
|
done < <(echo "$batch_json" | jq -c '.[]')
|
|
|
|
curl_args+=("$API_BASE/runs/multipart")
|
|
|
|
# Execute curl
|
|
local response
|
|
local http_code
|
|
|
|
response=$(curl "${curl_args[@]}" 2>&1)
|
|
http_code=$(echo "$response" | tail -n1)
|
|
response=$(echo "$response" | sed '$d')
|
|
|
|
# Cleanup temp directory
|
|
rm -rf "$temp_dir"
|
|
|
|
if [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then
|
|
log "ERROR" "Batch $operation failed: HTTP $http_code"
|
|
log "ERROR" "Response: $response"
|
|
return 1
|
|
fi
|
|
|
|
log "INFO" "Batch $operation succeeded: $batch_size runs"
|
|
return 0
|
|
}
|
|
|
|
# Create LangSmith trace
|
|
create_trace() {
|
|
local session_id="$1"
|
|
local turn_num="$2"
|
|
local user_msg="$3"
|
|
local assistant_messages="$4" # JSON array of assistant messages
|
|
local tool_results="$5"
|
|
|
|
# Initialize batch collectors for this trace
|
|
local posts_batch="[]"
|
|
local patches_batch="[]"
|
|
|
|
local turn_id
|
|
turn_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
|
|
|
|
local user_content
|
|
user_content=$(format_content "$user_msg")
|
|
|
|
local now
|
|
now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
# Create dotted_order timestamp with microseconds (format: YYYYMMDDTHHMMSSffffffZ)
|
|
local dotted_timestamp
|
|
dotted_timestamp=$(date -u +"%Y%m%dT%H%M%S")
|
|
local microseconds
|
|
microseconds=$(get_microseconds)
|
|
dotted_timestamp="${dotted_timestamp}${microseconds}Z"
|
|
|
|
# Create top-level turn run with dotted_order and trace_id
|
|
# For top-level run: trace_id = run_id
|
|
local turn_dotted_order="${dotted_timestamp}${turn_id}"
|
|
local turn_data
|
|
turn_data=$(jq -n \
|
|
--arg id "$turn_id" \
|
|
--arg trace_id "$turn_id" \
|
|
--arg name "Claude Code" \
|
|
--arg project "$PROJECT" \
|
|
--arg session "$session_id" \
|
|
--arg time "$now" \
|
|
--argjson content "$user_content" \
|
|
--arg turn "$turn_num" \
|
|
--arg dotted_order "$turn_dotted_order" \
|
|
'{
|
|
id: $id,
|
|
trace_id: $trace_id,
|
|
name: $name,
|
|
run_type: "chain",
|
|
inputs: {messages: [{role: "user", content: $content}]},
|
|
start_time: $time,
|
|
dotted_order: $dotted_order,
|
|
session_name: $project,
|
|
extra: {metadata: {thread_id: $session}},
|
|
tags: ["claude-code", ("turn-" + $turn)]
|
|
}')
|
|
|
|
posts_batch=$(echo "$posts_batch" | jq --argjson data "$turn_data" '. += [$data]')
|
|
|
|
# Track this turn for cleanup on early exit
|
|
CURRENT_TURN_ID="$turn_id"
|
|
|
|
# Build final outputs array (accumulates all LLM responses)
|
|
local all_outputs
|
|
all_outputs=$(jq -n --argjson content "$user_content" '[{role: "user", content: $content}]')
|
|
|
|
# Process each assistant message (each represents one LLM call)
|
|
local llm_num=0
|
|
local last_llm_end="$now"
|
|
while IFS= read -r assistant_msg; do
|
|
llm_num=$((llm_num + 1))
|
|
|
|
# Extract timestamp from message for proper ordering
|
|
local msg_timestamp
|
|
msg_timestamp=$(echo "$assistant_msg" | jq -r '.timestamp // ""')
|
|
|
|
# Use message timestamp for LLM start time
|
|
local llm_start
|
|
if [ -n "$msg_timestamp" ]; then
|
|
llm_start="$msg_timestamp"
|
|
elif [ $llm_num -eq 1 ]; then
|
|
llm_start="$now"
|
|
else
|
|
llm_start="$last_llm_end"
|
|
fi
|
|
|
|
# Create assistant run
|
|
local assistant_id
|
|
assistant_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
|
|
|
|
local tool_uses
|
|
tool_uses=$(get_tool_uses "$assistant_msg")
|
|
|
|
local assistant_content
|
|
assistant_content=$(format_content "$assistant_msg")
|
|
|
|
# Extract model name from assistant message and strip date suffix
|
|
# e.g., "claude-sonnet-4-5-20250929" -> "claude-sonnet-4-5"
|
|
local model_name
|
|
model_name=$(echo "$assistant_msg" | jq -r 'if type == "object" and has("message") then .message.model else empty end' | sed 's/-[0-9]\{8\}$//')
|
|
|
|
# Extract usage data from assistant message (preserved by merge_assistant_parts)
|
|
local msg_usage
|
|
msg_usage=$(echo "$assistant_msg" | jq 'if type == "object" and has("message") then .message._usage // null elif type == "object" then ._usage // null else null end')
|
|
|
|
# Build usage_metadata for LangSmith
|
|
local usage_metadata
|
|
if [ "$msg_usage" != "null" ] && [ -n "$msg_usage" ]; then
|
|
usage_metadata=$(echo "$msg_usage" | jq '{
|
|
input_tokens: ((.input_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0)),
|
|
output_tokens: (.output_tokens // 0),
|
|
input_token_details: {
|
|
cache_read: (.cache_read_input_tokens // 0),
|
|
cache_creation: (.cache_creation_input_tokens // 0)
|
|
}
|
|
}')
|
|
else
|
|
usage_metadata="null"
|
|
fi
|
|
|
|
# Build inputs for this LLM call (includes accumulated context)
|
|
local llm_inputs
|
|
llm_inputs=$(jq -n --argjson outputs "$all_outputs" '{messages: $outputs}')
|
|
|
|
# Create dotted_order for assistant (child of turn)
|
|
# Convert ISO timestamp to dotted_order format
|
|
# From: 2025-12-16T17:44:04.397Z
|
|
# To: 20251216T174404397000Z (milliseconds padded to microseconds)
|
|
local assistant_timestamp
|
|
if [ -n "$msg_timestamp" ]; then
|
|
# Extract and convert timestamp from message
|
|
assistant_timestamp=$(echo "$msg_timestamp" | sed 's/[-:]//g; s/\.\([0-9]*\)Z$/\1000Z/; s/T\([0-9]*\)\([0-9]\{3\}\)000Z$/T\1\2000Z/')
|
|
else
|
|
# Fallback to current time if no timestamp
|
|
assistant_timestamp=$(date -u +"%Y%m%dT%H%M%S")
|
|
local assistant_microseconds
|
|
assistant_microseconds=$(get_microseconds)
|
|
assistant_timestamp="${assistant_timestamp}${assistant_microseconds}Z"
|
|
fi
|
|
local assistant_dotted_order="${turn_dotted_order}.${assistant_timestamp}${assistant_id}"
|
|
|
|
# Extract trace_id from parent dotted_order (UUID after the Z)
|
|
# Format: 20231215T120000123456Zuuid -> uuid
|
|
local trace_id
|
|
trace_id="${turn_dotted_order#*Z}"
|
|
|
|
local assistant_data
|
|
assistant_data=$(jq -n \
|
|
--arg id "$assistant_id" \
|
|
--arg trace_id "$trace_id" \
|
|
--arg parent "$turn_id" \
|
|
--arg name "Claude" \
|
|
--arg project "$PROJECT" \
|
|
--arg time "$llm_start" \
|
|
--argjson inputs "$llm_inputs" \
|
|
--arg dotted_order "$assistant_dotted_order" \
|
|
--arg model "$model_name" \
|
|
'{
|
|
id: $id,
|
|
trace_id: $trace_id,
|
|
parent_run_id: $parent,
|
|
name: $name,
|
|
run_type: "llm",
|
|
inputs: $inputs,
|
|
start_time: $time,
|
|
dotted_order: $dotted_order,
|
|
session_name: $project,
|
|
extra: {metadata: {ls_provider: "anthropic", ls_model_name: $model}},
|
|
tags: [$model]
|
|
}')
|
|
|
|
posts_batch=$(echo "$posts_batch" | jq --argjson data "$assistant_data" '. += [$data]')
|
|
|
|
# Build outputs for this LLM call
|
|
local llm_outputs
|
|
llm_outputs=$(jq -n --argjson content "$assistant_content" '[{role: "assistant", content: $content}]')
|
|
|
|
# Track when this LLM iteration ends (after tools complete)
|
|
local assistant_end
|
|
|
|
# Create tool runs as siblings of the assistant run
|
|
if [ "$(echo "$tool_uses" | jq 'length')" -gt 0 ]; then
|
|
# First tool starts after LLM completes
|
|
# Use llm_start as LLM end time approximation (we don't have separate end timestamp)
|
|
local tool_start
|
|
tool_start="$llm_start"
|
|
|
|
# If there are multiple assistant parts, the last timestamp is closer to LLM end
|
|
local llm_end_approx
|
|
llm_end_approx=$(echo "$assistant_msg" | jq -r '.timestamp // ""')
|
|
if [ -n "$llm_end_approx" ]; then
|
|
tool_start="$llm_end_approx"
|
|
fi
|
|
|
|
while IFS= read -r tool; do
|
|
local tool_id
|
|
tool_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
|
|
|
|
local tool_name
|
|
tool_name=$(echo "$tool" | jq -r '.name // "tool"')
|
|
|
|
local tool_input
|
|
tool_input=$(echo "$tool" | jq '.input // {}')
|
|
|
|
local tool_use_id
|
|
tool_use_id=$(echo "$tool" | jq -r '.id // ""')
|
|
|
|
# Find tool result and extract timestamp from transcript
|
|
local result_data
|
|
result_data=$(find_tool_result_with_timestamp "$tool_use_id" "$tool_results")
|
|
|
|
local result
|
|
result=$(echo "$result_data" | jq -r '.result')
|
|
|
|
local tool_result_timestamp
|
|
tool_result_timestamp=$(echo "$result_data" | jq -r '.timestamp // ""')
|
|
|
|
# Create dotted_order for tool (child of turn)
|
|
# Use the tool result timestamp from transcript for proper ordering
|
|
local tool_timestamp
|
|
if [ -n "$tool_result_timestamp" ]; then
|
|
# Convert ISO timestamp to dotted_order format
|
|
# From: 2025-12-16T17:44:04.397Z
|
|
# To: 20251216T174404397000Z (milliseconds padded to microseconds)
|
|
tool_timestamp=$(echo "$tool_result_timestamp" | sed 's/[-:]//g; s/\.\([0-9]*\)Z$/\1000Z/; s/T\([0-9]*\)\([0-9]\{3\}\)000Z$/T\1\2000Z/')
|
|
else
|
|
# Fallback to current time if no timestamp in transcript
|
|
tool_timestamp=$(date -u +"%Y%m%dT%H%M%S")
|
|
local tool_microseconds
|
|
tool_microseconds=$(get_microseconds)
|
|
tool_timestamp="${tool_timestamp}${tool_microseconds}Z"
|
|
fi
|
|
|
|
local tool_dotted_order="${turn_dotted_order}.${tool_timestamp}${tool_id}"
|
|
|
|
# Use tool result timestamp for end time as well
|
|
local tool_end
|
|
if [ -n "$tool_result_timestamp" ]; then
|
|
tool_end="$tool_result_timestamp"
|
|
else
|
|
tool_end=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
fi
|
|
|
|
# Tools are siblings of the assistant run (both children of turn run)
|
|
local tool_data
|
|
tool_data=$(jq -n \
|
|
--arg id "$tool_id" \
|
|
--arg trace_id "$trace_id" \
|
|
--arg parent "$turn_id" \
|
|
--arg name "$tool_name" \
|
|
--arg project "$PROJECT" \
|
|
--arg time "$tool_start" \
|
|
--argjson input "$tool_input" \
|
|
--arg dotted_order "$tool_dotted_order" \
|
|
'{
|
|
id: $id,
|
|
trace_id: $trace_id,
|
|
parent_run_id: $parent,
|
|
name: $name,
|
|
run_type: "tool",
|
|
inputs: {input: $input},
|
|
start_time: $time,
|
|
dotted_order: $dotted_order,
|
|
session_name: $project,
|
|
tags: ["tool"]
|
|
}')
|
|
|
|
posts_batch=$(echo "$posts_batch" | jq --argjson data "$tool_data" '. += [$data]')
|
|
|
|
local tool_update
|
|
tool_update=$(echo "$result" | jq -Rs \
|
|
--arg time "$tool_end" \
|
|
--arg id "$tool_id" \
|
|
--arg trace_id "$trace_id" \
|
|
--arg parent "$turn_id" \
|
|
--arg dotted_order "$tool_dotted_order" \
|
|
'{
|
|
id: $id,
|
|
trace_id: $trace_id,
|
|
parent_run_id: $parent,
|
|
dotted_order: $dotted_order,
|
|
outputs: {output: .},
|
|
end_time: $time
|
|
}')
|
|
|
|
patches_batch=$(echo "$patches_batch" | jq --argjson data "$tool_update" '. += [$data]')
|
|
|
|
# Next tool starts after this one ends
|
|
tool_start="$tool_end"
|
|
|
|
done < <(echo "$tool_uses" | jq -c '.[]')
|
|
|
|
# Assistant completes after all tools finish
|
|
assistant_end="$tool_start"
|
|
else
|
|
# No tools, assistant completes immediately
|
|
assistant_end=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
fi
|
|
|
|
# Now complete the assistant run
|
|
local assistant_update
|
|
assistant_update=$(jq -n \
|
|
--arg time "$assistant_end" \
|
|
--arg id "$assistant_id" \
|
|
--arg trace_id "$trace_id" \
|
|
--arg parent "$turn_id" \
|
|
--arg dotted_order "$assistant_dotted_order" \
|
|
--argjson outputs "$llm_outputs" \
|
|
--argjson usage_metadata "$usage_metadata" \
|
|
'{
|
|
id: $id,
|
|
trace_id: $trace_id,
|
|
parent_run_id: $parent,
|
|
dotted_order: $dotted_order,
|
|
outputs: ({messages: $outputs} + (if $usage_metadata != null then {usage_metadata: $usage_metadata} else {} end)),
|
|
end_time: $time
|
|
}')
|
|
|
|
patches_batch=$(echo "$patches_batch" | jq --argjson data "$assistant_update" '. += [$data]')
|
|
|
|
# Save end time for next LLM start
|
|
last_llm_end="$assistant_end"
|
|
|
|
# Add to overall outputs
|
|
all_outputs=$(echo "$all_outputs" | jq --argjson new "$llm_outputs" '. += $new')
|
|
|
|
# Add tool results to accumulated context (for next LLM's inputs)
|
|
if [ "$(echo "$tool_uses" | jq 'length')" -gt 0 ]; then
|
|
while IFS= read -r tool; do
|
|
local tool_use_id
|
|
tool_use_id=$(echo "$tool" | jq -r '.id // ""')
|
|
local result_data
|
|
result_data=$(find_tool_result_with_timestamp "$tool_use_id" "$tool_results")
|
|
local result
|
|
result=$(echo "$result_data" | jq -r '.result')
|
|
all_outputs=$(echo "$all_outputs" | jq \
|
|
--arg id "$tool_use_id" \
|
|
--arg result "$result" \
|
|
'. += [{role: "tool", tool_call_id: $id, content: [{type: "text", text: $result}]}]')
|
|
done < <(echo "$tool_uses" | jq -c '.[]')
|
|
fi
|
|
|
|
done < <(echo "$assistant_messages" | jq -c '.[]')
|
|
|
|
# Update turn run with all outputs
|
|
# Filter out user messages from final outputs
|
|
local turn_outputs
|
|
turn_outputs=$(echo "$all_outputs" | jq '[.[] | select(.role != "user")]')
|
|
|
|
# Use the last LLM's end time as the turn end time
|
|
local turn_end="$last_llm_end"
|
|
|
|
local turn_update
|
|
turn_update=$(jq -n \
|
|
--arg time "$turn_end" \
|
|
--arg id "$turn_id" \
|
|
--arg trace_id "$turn_id" \
|
|
--arg dotted_order "$turn_dotted_order" \
|
|
--argjson outputs "$turn_outputs" \
|
|
'{
|
|
id: $id,
|
|
trace_id: $trace_id,
|
|
dotted_order: $dotted_order,
|
|
outputs: {messages: $outputs},
|
|
end_time: $time
|
|
}')
|
|
|
|
patches_batch=$(echo "$patches_batch" | jq --argjson data "$turn_update" '. += [$data]')
|
|
|
|
# Send both batches
|
|
send_multipart_batch "post" "$posts_batch" || true
|
|
send_multipart_batch "patch" "$patches_batch" || true
|
|
|
|
# Clear the tracked turn since it's now complete
|
|
CURRENT_TURN_ID=""
|
|
|
|
log "INFO" "Created turn $turn_num: $turn_id with $llm_num LLM call(s)"
|
|
}
|
|
|
|
# Main function
|
|
main() {
|
|
# Track execution time
|
|
local script_start
|
|
script_start=$(date +%s)
|
|
|
|
# Read hook input
|
|
local hook_input
|
|
hook_input=$(cat)
|
|
|
|
# Check stop_hook_active flag
|
|
if echo "$hook_input" | jq -e '.stop_hook_active == true' > /dev/null 2>&1; then
|
|
debug "stop_hook_active=true, skipping"
|
|
exit 0
|
|
fi
|
|
|
|
# Extract session info
|
|
local session_id
|
|
session_id=$(echo "$hook_input" | jq -r '.session_id // ""')
|
|
|
|
local transcript_path
|
|
transcript_path=$(echo "$hook_input" | jq -r '.transcript_path // ""' | sed "s|^~|$HOME|")
|
|
|
|
if [ -z "$session_id" ] || [ ! -f "$transcript_path" ]; then
|
|
log "WARN" "Invalid input: session=$session_id, transcript=$transcript_path"
|
|
exit 0
|
|
fi
|
|
|
|
log "INFO" "Processing session $session_id"
|
|
|
|
# Load state
|
|
local state
|
|
state=$(load_state)
|
|
|
|
local last_line
|
|
last_line=$(echo "$state" | jq -r --arg sid "$session_id" '.[$sid].last_line // -1')
|
|
|
|
local turn_count
|
|
turn_count=$(echo "$state" | jq -r --arg sid "$session_id" '.[$sid].turn_count // 0')
|
|
|
|
# Parse new messages
|
|
local new_messages
|
|
new_messages=$(awk -v start="$last_line" 'NR > start + 1 && NF' "$transcript_path")
|
|
|
|
if [ -z "$new_messages" ]; then
|
|
debug "No new messages"
|
|
exit 0
|
|
fi
|
|
|
|
local msg_count
|
|
msg_count=$(echo "$new_messages" | wc -l)
|
|
log "INFO" "Found $msg_count new messages"
|
|
|
|
# Group into turns
|
|
local current_user=""
|
|
local current_assistants="[]" # Array of assistant messages
|
|
local current_msg_id="" # Current assistant message ID
|
|
local current_assistant_parts="[]" # Parts of current assistant message
|
|
local current_tool_results="[]"
|
|
local turns=0
|
|
local new_last_line=$last_line
|
|
|
|
while IFS= read -r line; do
|
|
new_last_line=$((new_last_line + 1))
|
|
|
|
if [ -z "$line" ]; then
|
|
continue
|
|
fi
|
|
|
|
local role
|
|
role=$(echo "$line" | jq -r 'if type == "object" and has("message") then .message.role elif type == "object" then .role else "unknown" end')
|
|
|
|
if [ "$role" = "user" ]; then
|
|
if [ "$(is_tool_result "$line")" = "true" ]; then
|
|
# Add to tool results
|
|
current_tool_results=$(echo "$current_tool_results" | jq --argjson msg "$line" '. += [$msg]')
|
|
else
|
|
# New turn - finalize any pending assistant message
|
|
if [ -n "$current_msg_id" ] && [ "$(echo "$current_assistant_parts" | jq 'length')" -gt 0 ]; then
|
|
# Merge parts and add to assistants array
|
|
local merged
|
|
merged=$(merge_assistant_parts "$current_assistant_parts")
|
|
current_assistants=$(echo "$current_assistants" | jq --argjson msg "$merged" '. += [$msg]')
|
|
current_assistant_parts="[]"
|
|
current_msg_id=""
|
|
fi
|
|
|
|
# Create trace for previous turn
|
|
if [ -n "$current_user" ] && [ "$(echo "$current_assistants" | jq 'length')" -gt 0 ]; then
|
|
turns=$((turns + 1))
|
|
local turn_num=$((turn_count + turns))
|
|
create_trace "$session_id" "$turn_num" "$current_user" "$current_assistants" "$current_tool_results" || true
|
|
fi
|
|
|
|
# Start new turn
|
|
current_user="$line"
|
|
current_assistants="[]"
|
|
current_assistant_parts="[]"
|
|
current_msg_id=""
|
|
current_tool_results="[]"
|
|
fi
|
|
elif [ "$role" = "assistant" ]; then
|
|
# Get message ID
|
|
local msg_id
|
|
msg_id=$(echo "$line" | jq -r 'if type == "object" and has("message") then .message.id else "" end')
|
|
|
|
if [ -z "$msg_id" ]; then
|
|
# No message ID, treat as continuation of current message
|
|
current_assistant_parts=$(echo "$current_assistant_parts" | jq --argjson msg "$line" '. += [$msg]')
|
|
elif [ "$msg_id" = "$current_msg_id" ]; then
|
|
# Same message ID, add to current parts
|
|
current_assistant_parts=$(echo "$current_assistant_parts" | jq --argjson msg "$line" '. += [$msg]')
|
|
else
|
|
# New message ID - finalize previous message if any
|
|
if [ -n "$current_msg_id" ] && [ "$(echo "$current_assistant_parts" | jq 'length')" -gt 0 ]; then
|
|
# Merge parts and add to assistants array
|
|
local merged
|
|
merged=$(merge_assistant_parts "$current_assistant_parts")
|
|
current_assistants=$(echo "$current_assistants" | jq --argjson msg "$merged" '. += [$msg]')
|
|
fi
|
|
|
|
# Start new assistant message
|
|
current_msg_id="$msg_id"
|
|
current_assistant_parts=$(jq -n --argjson msg "$line" '[$msg]')
|
|
fi
|
|
fi
|
|
done <<< "$new_messages"
|
|
|
|
# Process final turn - finalize any pending assistant message
|
|
if [ -n "$current_msg_id" ] && [ "$(echo "$current_assistant_parts" | jq 'length')" -gt 0 ]; then
|
|
local merged
|
|
merged=$(merge_assistant_parts "$current_assistant_parts")
|
|
current_assistants=$(echo "$current_assistants" | jq --argjson msg "$merged" '. += [$msg]')
|
|
fi
|
|
|
|
if [ -n "$current_user" ] && [ "$(echo "$current_assistants" | jq 'length')" -gt 0 ]; then
|
|
turns=$((turns + 1))
|
|
local turn_num=$((turn_count + turns))
|
|
create_trace "$session_id" "$turn_num" "$current_user" "$current_assistants" "$current_tool_results" || true
|
|
fi
|
|
|
|
# Update state
|
|
local updated
|
|
updated=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
state=$(echo "$state" | jq \
|
|
--arg sid "$session_id" \
|
|
--arg line "$new_last_line" \
|
|
--arg count "$((turn_count + turns))" \
|
|
--arg time "$updated" \
|
|
'.[$sid] = {last_line: ($line | tonumber), turn_count: ($count | tonumber), updated: $time}')
|
|
|
|
save_state "$state"
|
|
|
|
# Log execution time
|
|
local script_end
|
|
script_end=$(date +%s)
|
|
local duration=$((script_end - script_start))
|
|
|
|
log "INFO" "Processed $turns turns in ${duration}s"
|
|
if [ "$duration" -gt 180 ]; then
|
|
log "WARN" "Hook took ${duration}s (>3min), consider optimizing"
|
|
fi
|
|
}
|
|
|
|
# Run main
|
|
main
|
|
|
|
exit 0 |