add tests

This commit is contained in:
Tanushree Sharma
2025-12-29 17:39:59 -05:00
commit 673fbf92e5
33 changed files with 5960 additions and 0 deletions
+28
View File
@@ -0,0 +1,28 @@
# Claude Code to LangSmith Tracing Project
## Project Overview
This project sets up tracing of Claude Code conversations to LangSmith.
## How It Works
- A "Stop" hook is configured in `.claude/settings.local.json` that runs each time Claude Code responds
- The hook reads Claude Code's generated conversation transcripts
- Messages in the transcript are converted into LangSmith runs and sent to the configured LangSmith project
## Commands
### Fetch Traces
Use the langsmith-fetch command to retrieve traces from the LangSmith project when you want to debug. Do this proactivley to make sure your changes are correct:
Get the last trace:
```bash
langsmith-fetch traces --project-uuid 16e20536-e4d7-4390-8fcf-1d49cb47f4c2 --format json
```
Get the last N traces:
```bash
langsmith-fetch traces --project-uuid 16e20536-e4d7-4390-8fcf-1d49cb47f4c2 --format json --limit 5
```
## Project Configuration
- LangSmith Project UUID: `16e20536-e4d7-4390-8fcf-1d49cb47f4c2`
- Hook configuration is in `.claude/settings.local.json`
View File
+52
View File
@@ -0,0 +1,52 @@
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Virtual environments
.venv/
venv/
ENV/
# Testing
.pytest_cache/
.coverage
htmlcov/
.tox/
.nox/
# IDE
.idea/
.vscode/
*.swp
*.swo
*~
# Environment variables
.env
.env.local
# OS
.DS_Store
Thumbs.db
# Project specific
cc_transcript.jsonl
*.log
+28
View File
@@ -0,0 +1,28 @@
[pytest]
testpaths = tests
python_files = test_*.py
python_classes = Test*
python_functions = test_*
# Markers for test categorization
markers =
unit: Unit tests for individual functions
integration: Integration tests requiring API calls
slow: Tests that take >5 seconds
# Integration tests disabled by default (require API key)
addopts =
-v
--tb=short
--strict-markers
-m "not integration"
--cov=tests
--cov-report=html
--cov-report=term-missing
# Timeout for tests
timeout = 60
# Capture output
log_cli = false
log_cli_level = INFO
Executable
+957
View File
@@ -0,0 +1,957 @@
#!/bin/bash
###
# Claude Code Stop Hook - LangSmith Tracing Integration
# Sends Claude Code traces to LangSmith after each response.
###
set -e
# Config (needed early for logging)
LOG_FILE="$HOME/.claude/state/hook.log"
DEBUG="$(echo "$CC_LANGSMITH_DEBUG" | tr '[:upper:]' '[:lower:]')"
# Logging functions
log() {
local level="$1"
shift
echo "$(date '+%Y-%m-%d %H:%M:%S') [$level] $*" >> "$LOG_FILE"
}
debug() {
if [ "$DEBUG" = "true" ]; then
log "DEBUG" "$@"
fi
}
# Immediate debug logging
debug "Hook started, TRACE_TO_LANGSMITH=$TRACE_TO_LANGSMITH"
# Exit early if tracing disabled
if [ "$(echo "$TRACE_TO_LANGSMITH" | tr '[:upper:]' '[:lower:]')" != "true" ]; then
debug "Tracing disabled, exiting early"
exit 0
fi
# Required commands
for cmd in jq curl uuidgen; do
if ! command -v "$cmd" &> /dev/null; then
echo "Error: $cmd is required but not installed" >&2
exit 0
fi
done
# Config (continued)
API_KEY="${CC_LANGSMITH_API_KEY:-$LANGSMITH_API_KEY}"
PROJECT="${CC_LANGSMITH_PROJECT:-claude-code}"
API_BASE="https://api.smith.langchain.com"
STATE_FILE="${STATE_FILE:-$HOME/.claude/state/langsmith_state.json}"
# Global variables
CURRENT_TURN_ID="" # Track current turn run for cleanup on exit
# Ensure state directory exists
mkdir -p "$(dirname "$STATE_FILE")"
# Validate API key
if [ -z "$API_KEY" ]; then
log "ERROR" "CC_LANGSMITH_API_KEY not set"
exit 0
fi
# Get microseconds portably (macOS doesn't support date +%N)
get_microseconds() {
if command -v gdate &> /dev/null; then
# Use GNU date if available (brew install coreutils)
gdate +%6N
elif [[ "$OSTYPE" == "darwin"* ]]; then
# macOS fallback: use Python for microseconds
python3 -c "import time; print(str(int(time.time() * 1000000) % 1000000).zfill(6))"
else
# Linux/GNU date
date +%6N
fi
}
# Get file size portably (macOS and Linux have different stat syntax)
get_file_size() {
local file="$1"
if [[ "$OSTYPE" == "darwin"* ]]; then
stat -f%z "$file"
else
stat -c%s "$file"
fi
}
# API call helper
api_call() {
local method="$1"
local endpoint="$2"
local data="$3"
local response
local http_code
response=$(curl -s --max-time 60 -w "\n%{http_code}" -X "$method" \
-H "x-api-key: $API_KEY" \
-H "Content-Type: application/json" \
-d "$data" \
"$API_BASE$endpoint" 2>&1)
http_code=$(echo "$response" | tail -n1)
response=$(echo "$response" | sed '$d')
if [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then
log "ERROR" "API call failed: $method $endpoint"
log "ERROR" "HTTP $http_code: $response"
log "ERROR" "Request data: ${data:0:500}"
return 1
fi
echo "$response"
}
# Cleanup function to complete pending turn run on exit
cleanup_pending_turn() {
if [ -n "$CURRENT_TURN_ID" ]; then
debug "Cleanup: completing pending turn run $CURRENT_TURN_ID"
local now
now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
local turn_update
turn_update=$(jq -n \
--arg time "$now" \
'{
outputs: {messages: []},
end_time: $time,
error: "Incomplete: script exited early"
}')
# Try to complete the turn run (ignore errors since we're exiting anyway)
api_call "PATCH" "/runs/$CURRENT_TURN_ID" "$turn_update" > /dev/null 2>&1 || true
log "WARN" "Completed pending turn run $CURRENT_TURN_ID due to early exit"
fi
}
# Set trap to cleanup on exit (EXIT covers normal exit, errors, and interrupts)
trap cleanup_pending_turn EXIT
# Load state
load_state() {
if [ ! -f "$STATE_FILE" ]; then
echo "{}"
return
fi
cat "$STATE_FILE"
}
# Save state
save_state() {
local state="$1"
echo "$state" > "$STATE_FILE"
}
# Get message content
get_content() {
local msg="$1"
echo "$msg" | jq -c 'if type == "object" and has("message") then .message.content elif type == "object" then .content else null end'
}
# Check if message is tool result
is_tool_result() {
local msg="$1"
local content
content=$(get_content "$msg")
if echo "$content" | jq -e 'if type == "array" then any(.[]; type == "object" and .type == "tool_result") else false end' > /dev/null 2>&1; then
echo "true"
else
echo "false"
fi
}
# Format content blocks for LangSmith
format_content() {
local msg="$1"
local content
content=$(get_content "$msg")
# Handle string content
if echo "$content" | jq -e 'type == "string"' > /dev/null 2>&1; then
echo "$content" | jq '[{"type": "text", "text": .}]'
return
fi
# Handle array content
if echo "$content" | jq -e 'type == "array"' > /dev/null 2>&1; then
echo "$content" | jq '[
.[] |
if type == "object" then
if .type == "text" then
{"type": "text", "text": .text}
elif .type == "thinking" then
{"type": "thinking", "thinking": .thinking}
elif .type == "tool_use" then
{"type": "tool_call", "name": .name, "args": .input, "id": .id}
else
.
end
elif type == "string" then
{"type": "text", "text": .}
else
.
end
] | if length == 0 then [{"type": "text", "text": ""}] else . end'
return
fi
# Default
echo '[{"type": "text", "text": ""}]'
}
# Get tool uses from message
get_tool_uses() {
local msg="$1"
local content
content=$(get_content "$msg")
# Check if content is an array
if ! echo "$content" | jq -e 'type == "array"' > /dev/null 2>&1; then
echo "[]"
return
fi
echo "$content" | jq -c '[.[] | select(type == "object" and .type == "tool_use")]'
}
# Get usage from assistant message parts (takes last for SSE cumulative counts)
get_usage_from_parts() {
local parts="$1"
echo "$parts" | jq -c '
[.[] | .message.usage // null | select(. != null)] | last // null
'
}
# Find tool result and timestamp
# Returns JSON: {result: "...", timestamp: "..."}
find_tool_result_with_timestamp() {
local tool_id="$1"
local tool_results="$2"
local result_data
result_data=$(echo "$tool_results" | jq -c --arg id "$tool_id" '
first(
.[] |
. as $msg |
(if type == "object" and has("message") then .message.content elif type == "object" then .content else null end) as $content |
if $content | type == "array" then
$content[] |
select(type == "object" and .type == "tool_result" and .tool_use_id == $id) |
{
result: (
if .content | type == "array" then
[.content[] | select(type == "object" and .type == "text") | .text] | join(" ")
elif .content | type == "string" then
.content
else
.content | tostring
end
),
timestamp: $msg.timestamp
}
else
empty
end
) // {result: "No result", timestamp: null}
')
echo "$result_data"
}
# Merge assistant message parts
merge_assistant_parts() {
local current_assistant_parts="$1"
# Extract usage from parts (last one for SSE cumulative)
local usage
usage=$(get_usage_from_parts "$current_assistant_parts")
echo "$current_assistant_parts" | jq -s \
--argjson usage "$usage" \
'
.[0][0] as $base |
(.[0] | map(if type == "object" and has("message") then .message.content elif type == "object" then .content else null end) | map(select(. != null))) as $contents |
($contents | map(
if type == "string" then [{"type":"text","text":.}]
elif type == "array" then .
else [.]
end
) | add // []) as $merged_content |
($merged_content | reduce .[] as $item (
{result: [], buffer: null};
if $item.type == "text" then
if .buffer then .buffer.text += $item.text
else .buffer = $item
end
else
(if .buffer then .result += [.buffer] else . end) |
.buffer = null | .result += [$item]
end
) | if .buffer then .result + [.buffer] else .result end) as $final_content |
$base |
if type == "object" and has("message") then
.message.content = $final_content |
(if $usage != null then .message._usage = $usage else . end)
elif type == "object" then
.content = $final_content |
(if $usage != null then ._usage = $usage else . end)
else
.
end
'
}
# Serialize run data for multipart upload
# Writes parts to temp files and outputs curl -F arguments (one per line)
serialize_for_multipart() {
local operation="$1" # "post" or "patch"
local run_json="$2" # Full run JSON
local temp_dir="$3" # Temp directory for this batch
local run_id
run_id=$(echo "$run_json" | jq -r '.id')
# Extract inputs/outputs from main data
local inputs
inputs=$(echo "$run_json" | jq -c '.inputs // empty')
local outputs
outputs=$(echo "$run_json" | jq -c '.outputs // empty')
local main_data
main_data=$(echo "$run_json" | jq -c 'del(.inputs, .outputs)')
# Part 1: Main run data with Content-Length header
local main_file="$temp_dir/${operation}_${run_id}_main.json"
echo "$main_data" > "$main_file"
local main_size=$(get_file_size "$main_file")
echo "-F"
echo "${operation}.${run_id}=<${main_file};type=application/json;headers=Content-Length:${main_size}"
# Part 2: Inputs (if present) with Content-Length header
if [ "$inputs" != "null" ] && [ -n "$inputs" ]; then
local inputs_file="$temp_dir/${operation}_${run_id}_inputs.json"
echo "$inputs" > "$inputs_file"
local inputs_size=$(get_file_size "$inputs_file")
echo "-F"
echo "${operation}.${run_id}.inputs=<${inputs_file};type=application/json;headers=Content-Length:${inputs_size}"
fi
# Part 3: Outputs (if present) with Content-Length header
if [ "$outputs" != "null" ] && [ -n "$outputs" ]; then
local outputs_file="$temp_dir/${operation}_${run_id}_outputs.json"
echo "$outputs" > "$outputs_file"
local outputs_size=$(get_file_size "$outputs_file")
echo "-F"
echo "${operation}.${run_id}.outputs=<${outputs_file};type=application/json;headers=Content-Length:${outputs_size}"
fi
}
# Send batch of runs via multipart endpoint
send_multipart_batch() {
local operation="$1" # "post" or "patch"
local batch_json="$2" # JSON array of runs
# Parse batch size
local batch_size
batch_size=$(echo "$batch_json" | jq 'length')
if [ "$batch_size" -eq 0 ]; then
debug "No $operation runs to send"
return 0
fi
# Create temp directory for this batch
local temp_dir
temp_dir=$(mktemp -d)
# Build multipart curl command
local curl_args=()
curl_args+=("-s" "--max-time" "60" "-w" "\n%{http_code}" "-X" "POST")
curl_args+=("-H" "x-api-key: $API_KEY")
# Serialize each run and collect curl -F arguments
while IFS= read -r run; do
# Read arguments line by line (proper array handling, no word splitting)
while IFS= read -r arg; do
curl_args+=("$arg")
done < <(serialize_for_multipart "$operation" "$run" "$temp_dir")
done < <(echo "$batch_json" | jq -c '.[]')
curl_args+=("$API_BASE/runs/multipart")
# Execute curl
local response
local http_code
response=$(curl "${curl_args[@]}" 2>&1)
http_code=$(echo "$response" | tail -n1)
response=$(echo "$response" | sed '$d')
# Cleanup temp directory
rm -rf "$temp_dir"
if [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then
log "ERROR" "Batch $operation failed: HTTP $http_code"
log "ERROR" "Response: $response"
return 1
fi
log "INFO" "Batch $operation succeeded: $batch_size runs"
return 0
}
# Create LangSmith trace
create_trace() {
local session_id="$1"
local turn_num="$2"
local user_msg="$3"
local assistant_messages="$4" # JSON array of assistant messages
local tool_results="$5"
# Initialize batch collectors for this trace
local posts_batch="[]"
local patches_batch="[]"
local turn_id
turn_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
local user_content
user_content=$(format_content "$user_msg")
local now
now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
# Create dotted_order timestamp with microseconds (format: YYYYMMDDTHHMMSSffffffZ)
local dotted_timestamp
dotted_timestamp=$(date -u +"%Y%m%dT%H%M%S")
local microseconds
microseconds=$(get_microseconds)
dotted_timestamp="${dotted_timestamp}${microseconds}Z"
# Create top-level turn run with dotted_order and trace_id
# For top-level run: trace_id = run_id
local turn_dotted_order="${dotted_timestamp}${turn_id}"
local turn_data
turn_data=$(jq -n \
--arg id "$turn_id" \
--arg trace_id "$turn_id" \
--arg name "Claude Code" \
--arg project "$PROJECT" \
--arg session "$session_id" \
--arg time "$now" \
--argjson content "$user_content" \
--arg turn "$turn_num" \
--arg dotted_order "$turn_dotted_order" \
'{
id: $id,
trace_id: $trace_id,
name: $name,
run_type: "chain",
inputs: {messages: [{role: "user", content: $content}]},
start_time: $time,
dotted_order: $dotted_order,
session_name: $project,
extra: {metadata: {thread_id: $session}},
tags: ["claude-code", ("turn-" + $turn)]
}')
posts_batch=$(echo "$posts_batch" | jq --argjson data "$turn_data" '. += [$data]')
# Track this turn for cleanup on early exit
CURRENT_TURN_ID="$turn_id"
# Build final outputs array (accumulates all LLM responses)
local all_outputs
all_outputs=$(jq -n --argjson content "$user_content" '[{role: "user", content: $content}]')
# Process each assistant message (each represents one LLM call)
local llm_num=0
local last_llm_end="$now"
while IFS= read -r assistant_msg; do
llm_num=$((llm_num + 1))
# Extract timestamp from message for proper ordering
local msg_timestamp
msg_timestamp=$(echo "$assistant_msg" | jq -r '.timestamp // ""')
# Use message timestamp for LLM start time
local llm_start
if [ -n "$msg_timestamp" ]; then
llm_start="$msg_timestamp"
elif [ $llm_num -eq 1 ]; then
llm_start="$now"
else
llm_start="$last_llm_end"
fi
# Create assistant run
local assistant_id
assistant_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
local tool_uses
tool_uses=$(get_tool_uses "$assistant_msg")
local assistant_content
assistant_content=$(format_content "$assistant_msg")
# Extract model name from assistant message and strip date suffix
# e.g., "claude-sonnet-4-5-20250929" -> "claude-sonnet-4-5"
local model_name
model_name=$(echo "$assistant_msg" | jq -r 'if type == "object" and has("message") then .message.model else empty end' | sed 's/-[0-9]\{8\}$//')
# Extract usage data from assistant message (preserved by merge_assistant_parts)
local msg_usage
msg_usage=$(echo "$assistant_msg" | jq 'if type == "object" and has("message") then .message._usage // null elif type == "object" then ._usage // null else null end')
# Build usage_metadata for LangSmith
local usage_metadata
if [ "$msg_usage" != "null" ] && [ -n "$msg_usage" ]; then
usage_metadata=$(echo "$msg_usage" | jq '{
input_tokens: ((.input_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0)),
output_tokens: (.output_tokens // 0),
input_token_details: {
cache_read: (.cache_read_input_tokens // 0),
cache_creation: (.cache_creation_input_tokens // 0)
}
}')
else
usage_metadata="null"
fi
# Build inputs for this LLM call (includes accumulated context)
local llm_inputs
llm_inputs=$(jq -n --argjson outputs "$all_outputs" '{messages: $outputs}')
# Create dotted_order for assistant (child of turn)
# Convert ISO timestamp to dotted_order format
# From: 2025-12-16T17:44:04.397Z
# To: 20251216T174404397000Z (milliseconds padded to microseconds)
local assistant_timestamp
if [ -n "$msg_timestamp" ]; then
# Extract and convert timestamp from message
assistant_timestamp=$(echo "$msg_timestamp" | sed 's/[-:]//g; s/\.\([0-9]*\)Z$/\1000Z/; s/T\([0-9]*\)\([0-9]\{3\}\)000Z$/T\1\2000Z/')
else
# Fallback to current time if no timestamp
assistant_timestamp=$(date -u +"%Y%m%dT%H%M%S")
local assistant_microseconds
assistant_microseconds=$(get_microseconds)
assistant_timestamp="${assistant_timestamp}${assistant_microseconds}Z"
fi
local assistant_dotted_order="${turn_dotted_order}.${assistant_timestamp}${assistant_id}"
# Extract trace_id from parent dotted_order (UUID after the Z)
# Format: 20231215T120000123456Zuuid -> uuid
local trace_id
trace_id="${turn_dotted_order#*Z}"
local assistant_data
assistant_data=$(jq -n \
--arg id "$assistant_id" \
--arg trace_id "$trace_id" \
--arg parent "$turn_id" \
--arg name "Claude" \
--arg project "$PROJECT" \
--arg time "$llm_start" \
--argjson inputs "$llm_inputs" \
--arg dotted_order "$assistant_dotted_order" \
--arg model "$model_name" \
'{
id: $id,
trace_id: $trace_id,
parent_run_id: $parent,
name: $name,
run_type: "llm",
inputs: $inputs,
start_time: $time,
dotted_order: $dotted_order,
session_name: $project,
extra: {metadata: {ls_provider: "anthropic", ls_model_name: $model}},
tags: [$model]
}')
posts_batch=$(echo "$posts_batch" | jq --argjson data "$assistant_data" '. += [$data]')
# Build outputs for this LLM call
local llm_outputs
llm_outputs=$(jq -n --argjson content "$assistant_content" '[{role: "assistant", content: $content}]')
# Track when this LLM iteration ends (after tools complete)
local assistant_end
# Create tool runs as siblings of the assistant run
if [ "$(echo "$tool_uses" | jq 'length')" -gt 0 ]; then
# First tool starts after LLM completes
# Use llm_start as LLM end time approximation (we don't have separate end timestamp)
local tool_start
tool_start="$llm_start"
# If there are multiple assistant parts, the last timestamp is closer to LLM end
local llm_end_approx
llm_end_approx=$(echo "$assistant_msg" | jq -r '.timestamp // ""')
if [ -n "$llm_end_approx" ]; then
tool_start="$llm_end_approx"
fi
while IFS= read -r tool; do
local tool_id
tool_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
local tool_name
tool_name=$(echo "$tool" | jq -r '.name // "tool"')
local tool_input
tool_input=$(echo "$tool" | jq '.input // {}')
local tool_use_id
tool_use_id=$(echo "$tool" | jq -r '.id // ""')
# Find tool result and extract timestamp from transcript
local result_data
result_data=$(find_tool_result_with_timestamp "$tool_use_id" "$tool_results")
local result
result=$(echo "$result_data" | jq -r '.result')
local tool_result_timestamp
tool_result_timestamp=$(echo "$result_data" | jq -r '.timestamp // ""')
# Create dotted_order for tool (child of turn)
# Use the tool result timestamp from transcript for proper ordering
local tool_timestamp
if [ -n "$tool_result_timestamp" ]; then
# Convert ISO timestamp to dotted_order format
# From: 2025-12-16T17:44:04.397Z
# To: 20251216T174404397000Z (milliseconds padded to microseconds)
tool_timestamp=$(echo "$tool_result_timestamp" | sed 's/[-:]//g; s/\.\([0-9]*\)Z$/\1000Z/; s/T\([0-9]*\)\([0-9]\{3\}\)000Z$/T\1\2000Z/')
else
# Fallback to current time if no timestamp in transcript
tool_timestamp=$(date -u +"%Y%m%dT%H%M%S")
local tool_microseconds
tool_microseconds=$(get_microseconds)
tool_timestamp="${tool_timestamp}${tool_microseconds}Z"
fi
local tool_dotted_order="${turn_dotted_order}.${tool_timestamp}${tool_id}"
# Use tool result timestamp for end time as well
local tool_end
if [ -n "$tool_result_timestamp" ]; then
tool_end="$tool_result_timestamp"
else
tool_end=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
fi
# Tools are siblings of the assistant run (both children of turn run)
local tool_data
tool_data=$(jq -n \
--arg id "$tool_id" \
--arg trace_id "$trace_id" \
--arg parent "$turn_id" \
--arg name "$tool_name" \
--arg project "$PROJECT" \
--arg time "$tool_start" \
--argjson input "$tool_input" \
--arg dotted_order "$tool_dotted_order" \
'{
id: $id,
trace_id: $trace_id,
parent_run_id: $parent,
name: $name,
run_type: "tool",
inputs: {input: $input},
start_time: $time,
dotted_order: $dotted_order,
session_name: $project,
tags: ["tool"]
}')
posts_batch=$(echo "$posts_batch" | jq --argjson data "$tool_data" '. += [$data]')
local tool_update
tool_update=$(echo "$result" | jq -Rs \
--arg time "$tool_end" \
--arg id "$tool_id" \
--arg trace_id "$trace_id" \
--arg parent "$turn_id" \
--arg dotted_order "$tool_dotted_order" \
'{
id: $id,
trace_id: $trace_id,
parent_run_id: $parent,
dotted_order: $dotted_order,
outputs: {output: .},
end_time: $time
}')
patches_batch=$(echo "$patches_batch" | jq --argjson data "$tool_update" '. += [$data]')
# Next tool starts after this one ends
tool_start="$tool_end"
done < <(echo "$tool_uses" | jq -c '.[]')
# Assistant completes after all tools finish
assistant_end="$tool_start"
else
# No tools, assistant completes immediately
assistant_end=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
fi
# Now complete the assistant run
local assistant_update
assistant_update=$(jq -n \
--arg time "$assistant_end" \
--arg id "$assistant_id" \
--arg trace_id "$trace_id" \
--arg parent "$turn_id" \
--arg dotted_order "$assistant_dotted_order" \
--argjson outputs "$llm_outputs" \
--argjson usage_metadata "$usage_metadata" \
'{
id: $id,
trace_id: $trace_id,
parent_run_id: $parent,
dotted_order: $dotted_order,
outputs: ({messages: $outputs} + (if $usage_metadata != null then {usage_metadata: $usage_metadata} else {} end)),
end_time: $time
}')
patches_batch=$(echo "$patches_batch" | jq --argjson data "$assistant_update" '. += [$data]')
# Save end time for next LLM start
last_llm_end="$assistant_end"
# Add to overall outputs
all_outputs=$(echo "$all_outputs" | jq --argjson new "$llm_outputs" '. += $new')
# Add tool results to accumulated context (for next LLM's inputs)
if [ "$(echo "$tool_uses" | jq 'length')" -gt 0 ]; then
while IFS= read -r tool; do
local tool_use_id
tool_use_id=$(echo "$tool" | jq -r '.id // ""')
local result_data
result_data=$(find_tool_result_with_timestamp "$tool_use_id" "$tool_results")
local result
result=$(echo "$result_data" | jq -r '.result')
all_outputs=$(echo "$all_outputs" | jq \
--arg id "$tool_use_id" \
--arg result "$result" \
'. += [{role: "tool", tool_call_id: $id, content: [{type: "text", text: $result}]}]')
done < <(echo "$tool_uses" | jq -c '.[]')
fi
done < <(echo "$assistant_messages" | jq -c '.[]')
# Update turn run with all outputs
# Filter out user messages from final outputs
local turn_outputs
turn_outputs=$(echo "$all_outputs" | jq '[.[] | select(.role != "user")]')
# Use the last LLM's end time as the turn end time
local turn_end="$last_llm_end"
local turn_update
turn_update=$(jq -n \
--arg time "$turn_end" \
--arg id "$turn_id" \
--arg trace_id "$turn_id" \
--arg dotted_order "$turn_dotted_order" \
--argjson outputs "$turn_outputs" \
'{
id: $id,
trace_id: $trace_id,
dotted_order: $dotted_order,
outputs: {messages: $outputs},
end_time: $time
}')
patches_batch=$(echo "$patches_batch" | jq --argjson data "$turn_update" '. += [$data]')
# Send both batches
send_multipart_batch "post" "$posts_batch" || true
send_multipart_batch "patch" "$patches_batch" || true
# Clear the tracked turn since it's now complete
CURRENT_TURN_ID=""
log "INFO" "Created turn $turn_num: $turn_id with $llm_num LLM call(s)"
}
# Main function
main() {
# Track execution time
local script_start
script_start=$(date +%s)
# Read hook input
local hook_input
hook_input=$(cat)
# Check stop_hook_active flag
if echo "$hook_input" | jq -e '.stop_hook_active == true' > /dev/null 2>&1; then
debug "stop_hook_active=true, skipping"
exit 0
fi
# Extract session info
local session_id
session_id=$(echo "$hook_input" | jq -r '.session_id // ""')
local transcript_path
transcript_path=$(echo "$hook_input" | jq -r '.transcript_path // ""' | sed "s|^~|$HOME|")
if [ -z "$session_id" ] || [ ! -f "$transcript_path" ]; then
log "WARN" "Invalid input: session=$session_id, transcript=$transcript_path"
exit 0
fi
log "INFO" "Processing session $session_id"
# Load state
local state
state=$(load_state)
local last_line
last_line=$(echo "$state" | jq -r --arg sid "$session_id" '.[$sid].last_line // -1')
local turn_count
turn_count=$(echo "$state" | jq -r --arg sid "$session_id" '.[$sid].turn_count // 0')
# Parse new messages
local new_messages
new_messages=$(awk -v start="$last_line" 'NR > start + 1 && NF' "$transcript_path")
if [ -z "$new_messages" ]; then
debug "No new messages"
exit 0
fi
local msg_count
msg_count=$(echo "$new_messages" | wc -l)
log "INFO" "Found $msg_count new messages"
# Group into turns
local current_user=""
local current_assistants="[]" # Array of assistant messages
local current_msg_id="" # Current assistant message ID
local current_assistant_parts="[]" # Parts of current assistant message
local current_tool_results="[]"
local turns=0
local new_last_line=$last_line
while IFS= read -r line; do
new_last_line=$((new_last_line + 1))
if [ -z "$line" ]; then
continue
fi
local role
role=$(echo "$line" | jq -r 'if type == "object" and has("message") then .message.role elif type == "object" then .role else "unknown" end')
if [ "$role" = "user" ]; then
if [ "$(is_tool_result "$line")" = "true" ]; then
# Add to tool results
current_tool_results=$(echo "$current_tool_results" | jq --argjson msg "$line" '. += [$msg]')
else
# New turn - finalize any pending assistant message
if [ -n "$current_msg_id" ] && [ "$(echo "$current_assistant_parts" | jq 'length')" -gt 0 ]; then
# Merge parts and add to assistants array
local merged
merged=$(merge_assistant_parts "$current_assistant_parts")
current_assistants=$(echo "$current_assistants" | jq --argjson msg "$merged" '. += [$msg]')
current_assistant_parts="[]"
current_msg_id=""
fi
# Create trace for previous turn
if [ -n "$current_user" ] && [ "$(echo "$current_assistants" | jq 'length')" -gt 0 ]; then
turns=$((turns + 1))
local turn_num=$((turn_count + turns))
create_trace "$session_id" "$turn_num" "$current_user" "$current_assistants" "$current_tool_results" || true
fi
# Start new turn
current_user="$line"
current_assistants="[]"
current_assistant_parts="[]"
current_msg_id=""
current_tool_results="[]"
fi
elif [ "$role" = "assistant" ]; then
# Get message ID
local msg_id
msg_id=$(echo "$line" | jq -r 'if type == "object" and has("message") then .message.id else "" end')
if [ -z "$msg_id" ]; then
# No message ID, treat as continuation of current message
current_assistant_parts=$(echo "$current_assistant_parts" | jq --argjson msg "$line" '. += [$msg]')
elif [ "$msg_id" = "$current_msg_id" ]; then
# Same message ID, add to current parts
current_assistant_parts=$(echo "$current_assistant_parts" | jq --argjson msg "$line" '. += [$msg]')
else
# New message ID - finalize previous message if any
if [ -n "$current_msg_id" ] && [ "$(echo "$current_assistant_parts" | jq 'length')" -gt 0 ]; then
# Merge parts and add to assistants array
local merged
merged=$(merge_assistant_parts "$current_assistant_parts")
current_assistants=$(echo "$current_assistants" | jq --argjson msg "$merged" '. += [$msg]')
fi
# Start new assistant message
current_msg_id="$msg_id"
current_assistant_parts=$(jq -n --argjson msg "$line" '[$msg]')
fi
fi
done <<< "$new_messages"
# Process final turn - finalize any pending assistant message
if [ -n "$current_msg_id" ] && [ "$(echo "$current_assistant_parts" | jq 'length')" -gt 0 ]; then
local merged
merged=$(merge_assistant_parts "$current_assistant_parts")
current_assistants=$(echo "$current_assistants" | jq --argjson msg "$merged" '. += [$msg]')
fi
if [ -n "$current_user" ] && [ "$(echo "$current_assistants" | jq 'length')" -gt 0 ]; then
turns=$((turns + 1))
local turn_num=$((turn_count + turns))
create_trace "$session_id" "$turn_num" "$current_user" "$current_assistants" "$current_tool_results" || true
fi
# Update state
local updated
updated=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
state=$(echo "$state" | jq \
--arg sid "$session_id" \
--arg line "$new_last_line" \
--arg count "$((turn_count + turns))" \
--arg time "$updated" \
'.[$sid] = {last_line: ($line | tonumber), turn_count: ($count | tonumber), updated: $time}')
save_state "$state"
# Log execution time
local script_end
script_end=$(date +%s)
local duration=$((script_end - script_start))
log "INFO" "Processed $turns turns in ${duration}s"
if [ "$duration" -gt 180 ]; then
log "WARN" "Hook took ${duration}s (>3min), consider optimizing"
fi
}
# Run main
main
exit 0
+331
View File
@@ -0,0 +1,331 @@
# Implementation Plan: Subagent Tracing in LangSmith
## Overview
Add comprehensive subagent tracing to `stop_hook.sh` to capture Task tool executions and their agent transcript conversations as nested runs in LangSmith.
## User Requirements
- Trace ALL Task tool invocations (all subagent_types: Explore, Plan, etc.)
- Create individual child runs for each agent message (user/assistant/tool within agent)
- Agent files stored in same directory as main transcript: `agent-{agentId}.jsonl`
- Correlate using `agentId` from `toolUseResult` field in main transcript
## Current Architecture
### Main Transcript Structure
- Location: Passed as `transcript_path` in hook input (e.g., `cc_transcript.jsonl`)
- Contains: User messages, assistant messages with tool_use, tool_result messages
- Task tools have special `toolUseResult.agentId` field in tool_result messages
### Agent Transcript Structure
- Location: Same directory as main transcript, named `agent-{agentId}.jsonl`
- Format: One JSON object per line, same schema as main transcript
- Contains: Agent's internal conversation (user prompts, assistant responses, tool calls)
### Current Tool Processing (stop_hook.sh lines 599-695)
- Iterates through tool_uses from assistant messages
- Creates tool runs as siblings of assistant (both children of turn)
- Matches tool results using `find_tool_result_with_timestamp()`
- Batches POST/PATCH operations with `send_multipart_batch()`
## Implementation Steps
### 1. Add Detection Functions (Insert after line 263)
**New functions:**
```bash
# Check if tool is a Task tool
is_task_tool() {
local tool="$1"
local tool_name=$(echo "$tool" | jq -r '.name // ""')
[ "$tool_name" = "Task" ]
}
# Extract agentId from tool result
get_agent_id_from_result() {
local tool_use_id="$1"
local tool_results="$2"
echo "$tool_results" | jq -r --arg id "$tool_use_id" '
first(
.[] |
select(.toolUseResult.agentId != null) |
select(
(.message.content // .content) as $content |
if $content | type == "array" then
$content[] | select(.type == "tool_result" and .tool_use_id == $id)
else false end
) |
.toolUseResult.agentId
) // ""
'
}
# Get path to agent transcript file
get_agent_transcript_path() {
local transcript_path="$1"
local agent_id="$2"
local transcript_dir=$(dirname "$transcript_path")
echo "${transcript_dir}/agent-${agent_id}.jsonl"
}
```
**Purpose:** Identify Task tools and locate corresponding agent files
### 2. Add Agent Processing Function (Insert after detection functions)
**New function:** `process_agent_transcript()`
**Parameters:**
- `parent_tool_id`: Task tool's run ID (parent for agent messages)
- `agent_id`: Agent identifier (e.g., "558bc970")
- `main_transcript_path`: Path to main transcript for deriving agent file path
- `tool_result_timestamp`: Timestamp from tool_result for ordering
- `parent_dotted_order`: Task tool's dotted_order for hierarchy
- `trace_id`: Trace ID for all runs in this trace
- `posts_batch_ref`: Variable name containing posts batch array
- `patches_batch_ref`: Variable name containing patches batch array
**Logic:**
1. Derive agent file path from main transcript path + agent_id
2. Check if agent file exists (graceful exit if not)
3. Read agent file line by line
4. For each agent message:
- Extract role (assistant/user), timestamp, content
- Skip tool_result messages (they're already processed)
- Create run with proper dotted_order: `{parent_tool_order}.{msg_timestamp}{msg_uuid}`
- For assistant messages: Extract model, usage, tool_uses
- For assistant with tool_uses: Create child tool runs
- Add to posts_batch and patches_batch using indirect variable refs
**Key features:**
- Handles nested tool calls within agent
- Preserves timestamps from agent transcript
- Maintains proper hierarchy via dotted_order
- Efficient: line-by-line processing, no full file load
### 3. Integrate into Tool Processing Loop (Modify lines 599-695)
**Insert point:** Line 690 (after Task tool POST, before tool PATCH)
**Integration code:**
```bash
# After creating the tool run (POST)...
# Check if this is a Task tool
if is_task_tool "$tool"; then
debug "Detected Task tool: $tool_name"
# Extract agentId from tool result
local agent_id
agent_id=$(get_agent_id_from_result "$tool_use_id" "$tool_results")
if [ -n "$agent_id" ]; then
debug "Found agentId: $agent_id for tool $tool_use_id"
# Process agent transcript
process_agent_transcript \
"$tool_id" \
"$agent_id" \
"$transcript_path" \
"$tool_result_timestamp" \
"$tool_dotted_order" \
"$trace_id" \
"posts_batch" \
"patches_batch"
else
debug "No agentId found for Task tool $tool_use_id"
fi
fi
# Then create tool completion (PATCH)...
```
**Rationale:**
- Task tool run must exist before agent messages can reference it as parent
- Agent processing happens between tool creation and completion
- All runs batched together for efficient API submission
### 4. Store Transcript Path (Modify line 805)
**Current:**
```bash
local transcript_path
transcript_path=$(echo "$hook_input" | jq -r '.transcript_path // ""' | sed "s|^~|$HOME|")
```
**Required:** Ensure `transcript_path` variable is accessible in `create_trace()` function scope
**Solution:** Pass `transcript_path` as parameter to `create_trace()` or make it a global variable accessible throughout the script
## LangSmith Run Hierarchy
**Before (current):**
```
Turn (Claude Code chain)
├── Assistant (llm)
├── Read (tool)
├── Assistant (llm)
└── Edit (tool)
```
**After (with agents):**
```
Turn (Claude Code chain)
├── Assistant (llm)
├── Task (tool)
│ ├── Agent: claude-haiku-4-5 (llm)
│ ├── Glob (tool)
│ ├── Read (tool)
│ ├── Agent: claude-haiku-4-5 (llm)
│ └── Bash (tool)
├── Assistant (llm)
└── Edit (tool)
```
**Key relationships:**
- Turn run = parent for both main assistant AND all tools (including Task)
- Task tool run = parent for all agent messages and agent tools
- Agent assistant runs = children of Task tool
- Agent tool calls = siblings of agent assistant runs (both children of Task tool)
- All share same trace_id for unified trace view
**Important:** Tools are siblings of assistants, not nested under them. This matches Claude Code's execution model where tool calls happen between assistant messages.
## Dotted Order Management
**Format:** `YYYYMMDDTHHMMSSffffffZ{uuid}`
**Hierarchy encoding:**
- Turn: `20251216T174404397000Z{turn_uuid}`
- Task tool: `{turn_order}.{tool_timestamp}{tool_uuid}`
- Agent message: `{tool_order}.{agent_msg_timestamp}{agent_msg_uuid}`
- Agent tool: `{agent_msg_order}.{agent_tool_timestamp}{agent_tool_uuid}`
**Example:**
```
Turn: 20251216T174404397000Za1b2c3d4
Task tool: 20251216T174404397000Za1b2c3d4.20251216T174455000000Zi9j0k1l2
Agent msg: 20251216T174404397000Za1b2c3d4.20251216T174455000000Zi9j0k1l2.20251216T174409317000Zm3n4o5p6
Agent tool: 20251216T174404397000Za1b2c3d4.20251216T174455000000Zi9j0k1l2.20251216T174409317000Zm3n4o5p6.20251216T174410733000Zq7r8s9t0
```
LangSmith sorts runs lexicographically by dotted_order, ensuring proper visual hierarchy.
## Error Handling
**Missing agent file:**
- Check: `[ ! -f "$agent_file" ]`
- Action: Log debug message, return gracefully
- Impact: Main trace completes normally, just without agent details
**Empty agent transcript:**
- Check: `[ -z "$agent_messages" ]`
- Action: Log debug message, return gracefully
**Invalid JSON in agent file:**
- Mitigation: Use `jq` with `2>/dev/null` and `|| echo ""` fallbacks
- Parse errors don't crash hook
**Large agent transcripts (100+ messages):**
- Solution: Line-by-line processing with `while read`
- Memory efficient, no timeout issues expected
## Performance Considerations
**Current:** 10 turns × 5 tools = ~100 operations → 2 API calls (POST + PATCH batches) → ~2-5s
**With agents:** 10 turns × 1 Task × 20 agent messages = +400 operations → Same 2 API calls → ~5-10s
**Optimization:**
- Agent runs added to existing batches (no extra API calls)
- Multipart batch endpoint handles large payloads efficiently
- Line-by-line processing prevents memory issues
## Testing Strategy
**Test cases:**
1. Single Task tool with 5 agent messages → Verify 1 Task + 5 child runs
2. Multiple Task tools in same turn → Verify independent agent hierarchies
3. Missing agent file → Verify graceful degradation
4. Agent with tool calls → Verify nested tool runs under agent assistant
5. Large agent (50+ messages) → Verify performance <10s
**Validation:**
- Agent runs appear as children of Task in LangSmith UI
- Timestamps accurate, dotted_order correct
- Usage metadata captured for agent LLM calls
- Tags distinguish agent runs ("agent", "subagent", "agent-tool")
## Critical Files
**Primary:**
- `/Users/tanushreesharma/tracing-claude-code/stop_hook.sh` - Main implementation file
- Lines 263: Insert detection functions (~30 lines)
- After 263: Insert processing function (~250 lines)
- Line 690: Insert integration code (~20 lines)
- Line 805: Ensure transcript_path accessible
**Reference:**
- `cc_transcript.jsonl` - Example main transcript with Task tools
- `agent-*.jsonl` - Example agent transcripts
- `$HOME/.claude/state/hook.log` - Debug output for troubleshooting
## Rollout Plan
**Phase 1: Core Implementation**
- Add detection and processing functions
- Integrate into tool loop
- Test with simple Task tool (single agent, few messages)
**Phase 2: Validation**
- Test with multiple Task tools
- Test with large agent transcripts
- Verify LangSmith UI displays correctly
**Phase 3: Production**
- Enable in production environment
- Monitor logs for errors
- Collect user feedback
## Success Criteria
✓ All Task tool invocations traced with agent details
✓ Agent messages appear as proper child runs in LangSmith
✓ Correct hierarchy and ordering maintained
✓ No performance degradation (hook completes in <10s)
✓ Graceful handling of missing/invalid agent files
✓ Clear debug logging for troubleshooting
## Edge Cases
**Nested Task tools:** Agent calls Task → creates sub-agent
- Handled: Recursive processing via `process_agent_transcript`
- Limit: Consider depth limit (max 3 levels) if performance issues
**Concurrent agents:** Multiple Task tools in same turn
- Handled: Each agent processed independently in loop
- No conflicts (unique agentId, separate files)
**Agent file not yet written:** Hook runs before agent file created
- Handled: File check returns gracefully
- Next hook execution will pick it up if tool_result present
## Implementation Estimate
**Code size:**
- Detection functions: ~30 lines
- Processing function: ~250 lines
- Integration code: ~20 lines
- **Total new code: ~300 lines**
**Effort:**
- Implementation: 4-6 hours
- Testing: 2-3 hours
- Documentation: 1 hour
- **Total: ~1 day**
**Risk level:** Medium
- Touching production hook script
- Complex nested structure
- Multiple edge cases to handle
- Mitigated by: Graceful error handling, extensive testing, debug logging
Executable
+27
View File
@@ -0,0 +1,27 @@
#!/bin/bash
# Test script to manually trigger the stop hook with cc_transcript.jsonl
# Set the session ID from the transcript
SESSION_ID="6bb19f49-d296-485d-8eb8-c5cbb8a9b80d"
TRANSCRIPT_PATH="$HOME/tracing-claude-code/cc_transcript.jsonl"
# Create hook input
HOOK_INPUT=$(jq -n \
--arg sid "$SESSION_ID" \
--arg path "$TRANSCRIPT_PATH" \
'{
session_id: $sid,
transcript_path: $path,
stop_hook_active: false
}')
echo "Testing hook with cc_transcript.jsonl..."
echo "Session ID: $SESSION_ID"
echo
# Call the hook
echo "$HOOK_INPUT" | bash ./stop_hook.sh
echo
echo "Done! Check ~/.claude/state/hook.log for details"
+262
View File
@@ -0,0 +1,262 @@
# Test Suite for stop_hook.sh
Comprehensive pytest-based test suite for `stop_hook.sh`, which implements tracing from Claude Code -> LangSmith.
## Quick Start
```bash
# Install test dependencies
.venv/bin/pip install -r tests/requirements-test.txt
# Run all unit tests (no API key needed)
.venv/bin/pytest tests/unit/ -v
# Run with coverage
.venv/bin/pytest tests/unit/ --cov=tests --cov-report=html
# View coverage report
open htmlcov/index.html
```
## Test Results
Current Status: **279 passing unit tests** covering comprehensive functionality ✅
### Test Coverage
#### Core Functions
- **Message Parsing** (6 tests): ✅ get_content, is_tool_result, get_tool_uses
- **Content Formatting** (13 tests): ✅ format_content, merge_assistant_parts, get_usage_from_parts
- **Utilities** (6 tests): ✅ get_microseconds, get_file_size
- **Cost Tracking** (7 tests): ✅ usage_metadata calculation, cache token tracking
- **Trace Ordering** (16 tests): ✅ dotted_order format, chronological sorting, parent-child relationships
- **Model Name Formatting** (15 tests): ✅ Date suffix stripping for cleaner model names
- **State Management** (5 tests): ✅ load_state, save_state with isolated environment
#### API & Network Operations (35 tests)
- **API Call Function** (6 tests): ✅ HTTP request handling, headers, timeout configuration
- **API Error Handling** (3 tests): ✅ POST/PATCH structure, timeout handling
- **Multipart Batch Sending** (9 tests): ✅ Batch uploads, temp file management, endpoint usage
- **Cleanup on Exit** (7 tests): ✅ Pending turn cleanup, error handling, trap configuration
- **API Key Handling** (3 tests): ✅ Environment variable fallback, validation
- **HTTP Response Handling** (4 tests): ✅ Success codes, error logging, response body handling
- **Project Configuration** (3 tests): ✅ Project name, API base URL configuration
#### Main Entry Point & Workflow (54 tests)
- **Hook Input Parsing** (6 tests): ✅ session_id, transcript_path extraction and validation
- **Stop Hook Active Flag** (2 tests): ✅ Recursive execution prevention
- **Incremental Processing** (6 tests): ✅ last_line tracking, awk-based skipping
- **Turn Grouping** (8 tests): ✅ User/assistant/tool message grouping logic
- **SSE Streaming Merge** (6 tests): ✅ Message ID tracking, part accumulation
- **State Updates** (4 tests): ✅ Session-specific state persistence
- **Execution Time Tracking** (5 tests): ✅ Duration calculation, slow execution warnings
- **Tracing Disabled Check** (3 tests): ✅ TRACE_TO_LANGSMITH validation
- **Required Commands** (4 tests): ✅ jq, curl, uuidgen availability checks
- **Final Turn Processing** (2 tests): ✅ Pending message handling at EOF
- **Main Logging** (4 tests): ✅ Session start, message counts, turn tracking
- **Main Integration** (4 tests): ✅ End-to-end validation with mocked environment
#### Timestamp Conversion (21 tests)
- **ISO to Dotted Order** (9 tests): ✅ Format conversion, padding, delimiter removal
- **Dotted Order Format** (2 tests): ✅ Timestamp format validation
- **Chronological Ordering** (3 tests): ✅ Sort order verification across timestamps
- **Edge Cases** (5 tests): ✅ Midnight, end-of-day, zero milliseconds, leap years
- **Real Transcript Data** (2 tests): ✅ Actual timestamp format from cc_transcript.jsonl
#### Multipart Serialization (29 tests)
- **Serialize Function** (11 tests): ✅ Operation/run_json/temp_dir parameters, file creation
- **File Naming** (4 tests): ✅ Main/inputs/outputs file naming conventions
- **Data Separation** (4 tests): ✅ Excluding inputs/outputs from main data
- **Integration Tests** (6 tests): ✅ POST/PATCH operations, file existence validation
- **Curl Format** (4 tests): ✅ -F arguments, Content-Length headers, part naming
#### Trace Creation (65 tests)
- **Create Trace Function** (6 tests): ✅ Parameter acceptance and structure
- **Turn Run Creation** (8 tests): ✅ Chain type, UUID generation, dotted_order, tags
- **Assistant Run Creation** (8 tests): ✅ LLM type, parent relationships, model metadata
- **Tool Run Creation** (7 tests): ✅ Tool type, inputs, parent relationships
- **Tool Result Finding** (5 tests): ✅ Result lookup by ID, timestamp extraction
- **Usage Metadata** (6 tests): ✅ Token counts, cache tracking, input/output details
- **Dotted Order Hierarchy** (3 tests): ✅ Parent-child dotted_order relationships
- **Outputs Accumulation** (4 tests): ✅ Message accumulation across LLM calls
- **Batch Processing** (10 tests): ✅ POST/PATCH batch creation and submission
- **Current Turn Tracking** (2 tests): ✅ CURRENT_TURN_ID for cleanup
- **Multiple LLM Calls** (4 tests): ✅ Iteration, numbering, context accumulation
- **Logging** (2 tests): ✅ Turn creation, LLM call logging
## Test Structure
```
tests/
├── conftest.py # Pytest fixtures
├── pytest.ini # Pytest configuration
├── requirements-test.txt # Test dependencies
├── unit/ # Unit tests (no external dependencies)
│ ├── test_message_parsing.py # Content extraction (6 tests)
│ ├── test_content_formatting.py # LangSmith format (13 tests)
│ ├── test_state_management.py # State persistence (5 tests)
│ ├── test_utilities.py # Cross-platform utils (6 tests)
│ ├── test_cost_tracking.py # Token usage & cost (7 tests)
│ ├── test_trace_ordering.py # Timestamp ordering (16 tests)
│ ├── test_model_name_formatting.py # Model name cleaning (15 tests)
│ ├── test_api_calls.py # API operations & batch sending (35 tests)
│ ├── test_main_entry.py # Main workflow & entry point (54 tests)
│ ├── test_timestamp_conversion.py # ISO to dotted_order conversion (21 tests)
│ ├── test_multipart_serialization.py # Multipart file handling (29 tests)
│ └── test_trace_creation.py # Trace structure & hierarchy (65 tests)
├── helpers/ # Test utilities
│ ├── bash_runner.py # Execute bash functions in isolation
│ ├── langsmith_client.py # LangSmith API helpers
│ ├── transcript_parser.py # JSONL test data generation
│ └── state_manager.py # State file management
└── test_data/ # Test fixtures
├── minimal_transcript.jsonl
├── multi_turn.jsonl
├── with_tools.jsonl
└── streaming_sse.jsonl
```
## Key Features
### 1. BashRunner - Test Bash Functions in Isolation
```python
from tests.helpers.bash_runner import BashRunner
runner = BashRunner()
# Call any bash function from stop_hook.sh
result = runner.call_function("get_content", '{"message": {"content": "hello"}}')
print(result) # "hello"
```
### 2. TranscriptBuilder - Generate Test Data
```python
from tests.helpers.transcript_parser import TranscriptBuilder
builder = TranscriptBuilder(Path("test.jsonl"))
builder.add_user_message("Hello")
builder.add_assistant_message("Hi there!")
builder, tool_id = builder.add_tool_use("Read", {"file_path": "/test.txt"})
builder.add_tool_result(tool_id, "File content")
builder.build()
```
### 3. LangSmith Client - Verify Traces
```python
from tests.helpers.langsmith_client import LangSmithTestClient
client = LangSmithTestClient()
# Fetch traces
traces = client.fetch_traces(limit=10)
# Get child runs
children = client.get_child_runs(parent_run_id)
```
## Running Tests
### Unit Tests Only (Default)
```bash
# Run all unit tests
.venv/bin/pytest tests/unit/ -v
# Run specific test file
.venv/bin/pytest tests/unit/test_message_parsing.py -v
# Run specific test
.venv/bin/pytest tests/unit/test_message_parsing.py::TestGetContent::test_get_content_from_message_wrapper -v
```
### With Coverage
```bash
# Generate coverage report
.venv/bin/pytest tests/unit/ --cov=tests --cov-report=html --cov-report=term-missing
# View HTML report
open htmlcov/index.html
```
### Integration Tests (Future)
Integration tests require `CC_LANGSMITH_API_KEY`:
```bash
# Run integration tests
CC_LANGSMITH_API_KEY="your_key" .venv/bin/pytest tests/integration/ -v -m integration
```
## Test Fixtures
### Sample Data Fixtures (conftest.py)
- `sample_user_message` - Example user message
- `sample_assistant_message` - Assistant message with tool use
- `sample_tool_result` - Tool result message
- `sample_streaming_parts` - SSE streaming parts
### Helper Fixtures
- `bash_executor` - BashRunner instance
- `langsmith_client` - LangSmith API client
- `state_manager` - State file manager
- `transcript_builder` - Transcript generator
- `temp_state_file` - Isolated state file
- `temp_transcript` - Temporary transcript path
### Example Usage
```python
def test_example(bash_executor, sample_assistant_message):
msg = json.dumps(sample_assistant_message)
result = bash_executor.call_function("get_content", msg)
content = json.loads(result)
assert len(content) == 3
```
## Troubleshooting
### Tests Failing with "Function not found"
The bash_runner removes the early exit check from stop_hook.sh. If functions are not found, ensure:
1. stop_hook.sh is in the correct location
2. The sed pattern matches the early exit block
### State Management Tests Using Real State File
The `STATE_FILE` environment variable should point to a temp file, but stop_hook.sh has it hardcoded. To fix:
- Modify stop_hook.sh line 47 to: `STATE_FILE="${STATE_FILE:-$HOME/.claude/state/langsmith_state.json}"`
- Or: Run tests in isolation and clean up afterwards
### Integration Tests Require API Key
Integration tests need a valid LangSmith API key:
```bash
export CC_LANGSMITH_API_KEY="lsv2_pt_..."
.venv/bin/pytest tests/integration/ -v -m integration
```
## Contributing
When adding new functions to stop_hook.sh:
1. Add corresponding unit tests
2. Use BashRunner to test in isolation
3. Add sample fixtures if needed
4. Ensure 80%+ test coverage
5. Run tests before committing
Example:
```python
def test_new_function(bash_executor):
"""Test description"""
result = bash_executor.call_function("new_function", "arg1", "arg2")
assert result == "expected_value"
```
View File
+215
View File
@@ -0,0 +1,215 @@
"""
Pytest configuration and shared fixtures for stop_hook.sh tests.
"""
import uuid
import pytest
from tests.helpers.bash_runner import BashRunner
from tests.helpers.state_manager import StateManager
# =============================================================================
# Test Environment Fixtures
# =============================================================================
@pytest.fixture
def temp_state_file(tmp_path):
"""
Provide an isolated state file for each test.
Returns:
Path to temporary state file
"""
state_file = tmp_path / "langsmith_state.json"
yield state_file
# Cleanup happens automatically via tmp_path
@pytest.fixture(autouse=True)
def isolated_env(monkeypatch, temp_state_file, tmp_path):
"""
Ensure each test has isolated environment variables.
This fixture automatically applies to all tests.
"""
# Override state file location
monkeypatch.setenv("STATE_FILE", str(temp_state_file))
# Override log file location
log_file = tmp_path / "hook.log"
monkeypatch.setenv("LOG_FILE", str(log_file))
# Disable tracing during most tests (can be re-enabled per test)
monkeypatch.setenv("TRACE_TO_LANGSMITH", "false")
# Disable debug logging
monkeypatch.setenv("CC_LANGSMITH_DEBUG", "false")
# Set test project
monkeypatch.setenv("CC_LANGSMITH_PROJECT", "cc-test")
# =============================================================================
# Helper Class Fixtures
# =============================================================================
@pytest.fixture
def bash_executor():
"""
Provide BashRunner for executing bash functions in isolation.
Returns:
BashRunner instance
"""
return BashRunner("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh")
@pytest.fixture
def state_manager(temp_state_file):
"""
Provide StateManager for managing test state files.
Returns:
StateManager instance with isolated state file
"""
return StateManager(temp_state_file)
# =============================================================================
# Sample Data Fixtures
# =============================================================================
@pytest.fixture
def sample_assistant_message():
"""
Provide a sample assistant message with tool use.
Returns:
Dictionary with assistant message structure
"""
return {
"type": "assistant",
"message": {
"id": "msg_test123",
"role": "assistant",
"model": "claude-sonnet-4-5-20250929",
"content": [
{"type": "thinking", "thinking": "Let me analyze this..."},
{"type": "text", "text": "Here's my response"},
{
"type": "tool_use",
"id": "tool_test_abc",
"name": "Read",
"input": {"file_path": "/test/file.txt"}
}
],
"usage": {
"input_tokens": 100,
"output_tokens": 50,
"cache_read_input_tokens": 1000
}
},
"timestamp": "2025-01-01T00:00:01Z"
}
@pytest.fixture
def sample_tool_result():
"""
Provide a sample tool result message.
Returns:
Dictionary with tool result structure
"""
return {
"type": "user",
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "tool_test_abc",
"content": "File contents: hello world"
}
],
"timestamp": "2025-01-01T00:00:02Z"
}
@pytest.fixture
def sample_streaming_parts():
"""
Provide sample streaming assistant message parts (SSE simulation).
Returns:
List of message dictionaries with same ID
"""
msg_id = "msg_stream_123"
return [
{
"type": "assistant",
"message": {
"id": msg_id,
"role": "assistant",
"model": "claude-sonnet-4-5-20250929",
"content": [{"type": "text", "text": "Hello "}],
"usage": {"input_tokens": 10, "output_tokens": 2}
},
"timestamp": "2025-01-01T00:00:00.000Z"
},
{
"type": "assistant",
"message": {
"id": msg_id,
"role": "assistant",
"model": "claude-sonnet-4-5-20250929",
"content": [{"type": "text", "text": "world"}],
"usage": {"input_tokens": 10, "output_tokens": 4} # Cumulative
},
"timestamp": "2025-01-01T00:00:00.100Z"
},
{
"type": "assistant",
"message": {
"id": msg_id,
"role": "assistant",
"model": "claude-sonnet-4-5-20250929",
"content": [{"type": "text", "text": "!"}],
"usage": {"input_tokens": 10, "output_tokens": 5} # Cumulative
},
"timestamp": "2025-01-01T00:00:00.200Z"
}
]
# =============================================================================
# Session ID Fixture
# =============================================================================
@pytest.fixture
def session_id():
"""
Generate a unique session ID for each test.
Returns:
UUID string
"""
return str(uuid.uuid4())
# =============================================================================
# Parametrize Helpers
# =============================================================================
def pytest_configure(config):
"""Configure pytest with custom markers."""
config.addinivalue_line(
"markers", "unit: mark test as a unit test (no external dependencies)"
)
config.addinivalue_line(
"markers", "integration: mark test as an integration test (requires API access)"
)
config.addinivalue_line(
"markers", "slow: mark test as slow (takes >5 seconds)"
)
View File
+147
View File
@@ -0,0 +1,147 @@
"""
BashRunner - Execute bash functions from stop_hook.sh in isolation for unit testing.
This helper enables testing individual bash functions without executing the main script.
"""
import os
import shlex
import subprocess
from pathlib import Path
from typing import Optional
class BashRunner:
"""Execute bash functions from stop_hook.sh in isolation"""
def __init__(self, script_path: str = "/Users/tanushreesharma/tracing-claude-code/stop_hook.sh"):
self.script_path = script_path
if not Path(script_path).exists():
raise FileNotFoundError(f"Script not found: {script_path}")
def call_function(self, func_name: str, *args: str, stdin: Optional[str] = None) -> str:
"""
Call a bash function with arguments.
Args:
func_name: Name of the function to call
*args: Arguments to pass to the function
stdin: Optional stdin input for the function
Returns:
stdout from function execution
Raises:
RuntimeError: If the function execution fails
"""
# Create a script that sources stop_hook.sh (skip main execution) and calls the function
# We use sed to remove everything from 'main' onwards and the early exit check
quoted_args = ' '.join(shlex.quote(arg) for arg in args)
script = f"""
set -e
set -o pipefail
# Source functions from stop_hook.sh (skip main execution and early exit)
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' {shlex.quote(self.script_path)})
# Call target function
{func_name} {quoted_args}
"""
env = {
**os.environ,
"TRACE_TO_LANGSMITH": "false", # Disable hook during testing
"CC_LANGSMITH_DEBUG": "false", # Disable debug logging
}
try:
result = subprocess.run(
["bash", "-c", script],
input=stdin,
capture_output=True,
text=True,
timeout=30,
env=env
)
if result.returncode != 0:
error_msg = f"Function {func_name} failed with exit code {result.returncode}\n"
error_msg += f"STDOUT: {result.stdout}\n"
error_msg += f"STDERR: {result.stderr}\n"
error_msg += f"SCRIPT:\n{script}"
raise RuntimeError(error_msg)
return result.stdout.strip()
except subprocess.TimeoutExpired:
raise RuntimeError(f"Function {func_name} timed out after 30 seconds")
except Exception as e:
raise RuntimeError(f"Failed to execute function {func_name}: {str(e)}")
def call_with_stdin(self, func_name: str, stdin: str, *args: str) -> str:
"""
Call function with stdin input (convenience method).
Args:
func_name: Name of the function to call
stdin: Input to pipe to the function
*args: Arguments to pass to the function
Returns:
stdout from function execution
"""
return self.call_function(func_name, *args, stdin=stdin)
def get_function_source(self, func_name: str) -> str:
"""
Extract the source code of a specific function.
Useful for debugging or documentation purposes.
Args:
func_name: Name of the function
Returns:
The function source code
"""
script = f"""
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main()/,$d' {shlex.quote(self.script_path)})
declare -f {func_name}
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
timeout=10
)
if result.returncode != 0:
raise RuntimeError(f"Function {func_name} not found")
return result.stdout.strip()
def list_functions(self) -> list[str]:
"""
List all functions defined in stop_hook.sh.
Returns:
List of function names
"""
script = f"""
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main()/,$d' {shlex.quote(self.script_path)})
declare -F | awk '{{print $3}}'
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
timeout=10
)
if result.returncode != 0:
return []
return [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
+167
View File
@@ -0,0 +1,167 @@
"""
State file management utilities for tests.
Provides helpers for managing langsmith_state.json during testing.
"""
import json
from pathlib import Path
from typing import Any, Optional
class StateManager:
"""Manage langsmith_state.json for tests"""
def __init__(self, state_file: Path):
self.state_file = Path(state_file)
def load(self) -> dict:
"""
Load state from file.
Returns:
State dictionary (empty dict if file doesn't exist)
"""
if not self.state_file.exists():
return {}
try:
return json.loads(self.state_file.read_text())
except (json.JSONDecodeError, IOError):
return {}
def save(self, state: dict):
"""
Save state to file.
Args:
state: State dictionary to save
"""
self.state_file.parent.mkdir(parents=True, exist_ok=True)
self.state_file.write_text(json.dumps(state, indent=2))
def get_session_state(self, session_id: str) -> dict:
"""
Get state for a specific session.
Args:
session_id: Session ID to look up
Returns:
Session state dictionary (empty dict if not found)
"""
state = self.load()
return state.get(session_id, {})
def update_session(self, session_id: str, **kwargs):
"""
Update session state with new values.
Args:
session_id: Session ID to update
**kwargs: Key-value pairs to update in the session state
"""
state = self.load()
if session_id not in state:
state[session_id] = {}
state[session_id].update(kwargs)
self.save(state)
def set_session_state(self, session_id: str, session_state: dict):
"""
Set complete session state (replaces existing).
Args:
session_id: Session ID
session_state: New session state dictionary
"""
state = self.load()
state[session_id] = session_state
self.save(state)
def delete_session(self, session_id: str):
"""
Delete a session from state.
Args:
session_id: Session ID to delete
"""
state = self.load()
if session_id in state:
del state[session_id]
self.save(state)
def clear(self):
"""Clear all state (delete the file)."""
if self.state_file.exists():
self.state_file.unlink()
def exists(self) -> bool:
"""
Check if state file exists.
Returns:
True if file exists
"""
return self.state_file.exists()
def get_last_line(self, session_id: str) -> int:
"""
Get the last processed line number for a session.
Args:
session_id: Session ID
Returns:
Last line number (0 if not found)
"""
session_state = self.get_session_state(session_id)
return session_state.get("last_line", 0)
def get_turn_count(self, session_id: str) -> int:
"""
Get the turn count for a session.
Args:
session_id: Session ID
Returns:
Turn count (0 if not found)
"""
session_state = self.get_session_state(session_id)
return session_state.get("turn_count", 0)
def set_last_line(self, session_id: str, last_line: int):
"""
Set the last processed line number for a session.
Args:
session_id: Session ID
last_line: Line number
"""
self.update_session(session_id, last_line=last_line)
def set_turn_count(self, session_id: str, turn_count: int):
"""
Set the turn count for a session.
Args:
session_id: Session ID
turn_count: Number of turns
"""
self.update_session(session_id, turn_count=turn_count)
def list_sessions(self) -> list[str]:
"""
Get list of all session IDs in state.
Returns:
List of session ID strings
"""
state = self.load()
return list(state.keys())
def __repr__(self) -> str:
return f"StateManager({self.state_file})"
View File
+17
View File
@@ -0,0 +1,17 @@
# Testing framework
pytest==8.3.4
pytest-asyncio==0.24.0
pytest-mock==3.14.0
pytest-timeout==2.3.1
pytest-cov==6.0.0
# JSONL parsing
jsonlines==4.0.0
# Subprocess handling
subprocess-tee==0.4.2
# Already installed in parent environment:
# langsmith==0.4.58
# langchain==1.1.3
# langchain-core==1.1.3
View File
+2
View File
@@ -0,0 +1,2 @@
{"type": "user", "role": "user", "content": "Hello", "timestamp": "2025-12-23T00:38:30.199307Z"}
{"type": "assistant", "message": {"id": "msg_5a37288a", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "Hi there!"}], "usage": {"input_tokens": 10, "output_tokens": 5}}, "timestamp": "2025-12-23T00:38:30.199503Z"}
+6
View File
@@ -0,0 +1,6 @@
{"type": "user", "role": "user", "content": "Question 1", "timestamp": "2025-12-23T00:38:30.199818Z"}
{"type": "assistant", "message": {"id": "msg_63ecf218", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "Answer 1"}], "usage": {"input_tokens": 10, "output_tokens": 5}}, "timestamp": "2025-12-23T00:38:30.199832Z"}
{"type": "user", "role": "user", "content": "Question 2", "timestamp": "2025-12-23T00:38:30.199834Z"}
{"type": "assistant", "message": {"id": "msg_d3b3a286", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "Answer 2"}], "usage": {"input_tokens": 10, "output_tokens": 5}}, "timestamp": "2025-12-23T00:38:30.199840Z"}
{"type": "user", "role": "user", "content": "Question 3", "timestamp": "2025-12-23T00:38:30.199841Z"}
{"type": "assistant", "message": {"id": "msg_6a863ce1", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "Answer 3"}], "usage": {"input_tokens": 10, "output_tokens": 5}}, "timestamp": "2025-12-23T00:38:30.199845Z"}
+5
View File
@@ -0,0 +1,5 @@
{"type": "user", "role": "user", "content": "Tell me a story", "timestamp": "2025-12-23T00:38:30.200150Z"}
{"type": "assistant", "message": {"id": "msg_3cc4f9e4", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "Once "}], "usage": {"input_tokens": 10, "output_tokens": 2}}, "timestamp": "2025-12-23T00:38:30.200157Z"}
{"type": "assistant", "message": {"id": "msg_3cc4f9e4", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "upon "}], "usage": {"input_tokens": 10, "output_tokens": 4}}, "timestamp": "2025-12-23T00:38:30.200159Z"}
{"type": "assistant", "message": {"id": "msg_3cc4f9e4", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "a "}], "usage": {"input_tokens": 10, "output_tokens": 6}}, "timestamp": "2025-12-23T00:38:30.200161Z"}
{"type": "assistant", "message": {"id": "msg_3cc4f9e4", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "time..."}], "usage": {"input_tokens": 10, "output_tokens": 8}}, "timestamp": "2025-12-23T00:38:30.200162Z"}
+4
View File
@@ -0,0 +1,4 @@
{"type": "user", "role": "user", "content": "Read file test.txt", "timestamp": "2025-12-23T00:38:30.200007Z"}
{"type": "assistant", "message": {"id": "msg_ff5a77ea", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "I'll read that file."}, {"type": "tool_use", "id": "tool_0d13cad9", "name": "Read", "input": {"file_path": "/test/test.txt"}}], "usage": {"input_tokens": 10, "output_tokens": 15}}, "timestamp": "2025-12-23T00:38:30.200018Z"}
{"type": "user", "role": "user", "content": [{"type": "tool_result", "tool_use_id": "tool_0d13cad9", "content": "File content: hello world"}], "timestamp": "2025-12-23T00:38:30.200021Z"}
{"type": "assistant", "message": {"id": "msg_c47e16d2", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "The file says: hello world"}], "usage": {"input_tokens": 10, "output_tokens": 5}}, "timestamp": "2025-12-23T00:38:30.200026Z"}
View File
+402
View File
@@ -0,0 +1,402 @@
"""
Unit tests for API call functions in stop_hook.sh.
Tests:
- api_call() - HTTP request handling, error codes, timeouts
- send_multipart_batch() - Batch sending via multipart endpoint
- cleanup_pending_turn() - Cleanup on early exit
"""
import json
import os
import subprocess
import tempfile
from pathlib import Path
from unittest.mock import patch
import pytest
@pytest.mark.unit
class TestApiCallFunction:
"""Tests for api_call() function"""
def test_api_call_constructs_correct_curl_command(self, bash_executor, tmp_path):
"""Test that api_call constructs curl command with correct headers"""
# We can't easily test actual curl calls, but we can verify the function exists
# and has the right structure
source = bash_executor.get_function_source("api_call")
# Verify key components are present
assert "x-api-key:" in source
assert "Content-Type: application/json" in source
assert "curl" in source
assert "--max-time" in source
def test_api_call_handles_method_parameter(self, bash_executor):
"""Test that api_call accepts different HTTP methods"""
source = bash_executor.get_function_source("api_call")
# Should use $method variable in curl -X
assert "-X" in source
assert "method" in source
def test_api_call_uses_api_base_url(self, bash_executor):
"""Test that api_call uses the API base URL"""
source = bash_executor.get_function_source("api_call")
# Should reference API_BASE and endpoint
assert "API_BASE" in source
assert "endpoint" in source
def test_api_call_extracts_http_code(self, bash_executor):
"""Test that api_call extracts and checks HTTP response code"""
source = bash_executor.get_function_source("api_call")
# Should extract http_code from response
assert "http_code" in source
assert "%{http_code}" in source
def test_api_call_returns_error_on_4xx(self, bash_executor):
"""Test that api_call returns error for 4xx responses"""
source = bash_executor.get_function_source("api_call")
# Should check for error codes
assert "200" in source
assert "300" in source
assert "return 1" in source
def test_api_call_logs_errors(self, bash_executor):
"""Test that api_call logs errors on failure"""
source = bash_executor.get_function_source("api_call")
# Should log errors
assert "log" in source
assert "ERROR" in source
@pytest.mark.unit
class TestApiCallErrorHandling:
"""Tests for API call error handling scenarios"""
def test_api_call_structure_for_post(self):
"""Test api_call structure for POST requests"""
# Read the actual function to verify POST handling
script = """
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
declare -f api_call
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
source = result.stdout
# Verify it handles data parameter for POST
assert "-d" in source
assert "data" in source
def test_api_call_structure_for_patch(self):
"""Test api_call structure for PATCH requests"""
script = """
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
declare -f api_call
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
# PATCH uses same structure as POST with -X PATCH
source = result.stdout
assert "method" in source
def test_api_call_has_timeout(self):
"""Test that api_call has a timeout configured"""
script = """
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
declare -f api_call
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
source = result.stdout
assert "--max-time" in source
assert "60" in source # 60 second timeout
@pytest.mark.unit
class TestSendMultipartBatch:
"""Tests for send_multipart_batch() function"""
def test_send_multipart_batch_exists(self, bash_executor):
"""Test that send_multipart_batch function exists"""
source = bash_executor.get_function_source("send_multipart_batch")
assert "send_multipart_batch" in source
def test_send_multipart_batch_handles_empty_batch(self, bash_executor):
"""Test that empty batch is handled gracefully"""
source = bash_executor.get_function_source("send_multipart_batch")
# Should check for empty batch
assert "batch_size" in source or "length" in source
assert "0" in source
def test_send_multipart_batch_creates_temp_dir(self, bash_executor):
"""Test that temp directory is created for batch files"""
source = bash_executor.get_function_source("send_multipart_batch")
assert "mktemp -d" in source
assert "temp_dir" in source
def test_send_multipart_batch_cleans_up_temp_files(self, bash_executor):
"""Test that temp files are cleaned up after sending"""
source = bash_executor.get_function_source("send_multipart_batch")
# Should remove temp directory
assert "rm -rf" in source
def test_send_multipart_batch_uses_multipart_endpoint(self, bash_executor):
"""Test that multipart endpoint is used"""
source = bash_executor.get_function_source("send_multipart_batch")
assert "/runs/multipart" in source
def test_send_multipart_batch_handles_post_operation(self, bash_executor):
"""Test handling of 'post' operation"""
source = bash_executor.get_function_source("send_multipart_batch")
# Should handle operation parameter
assert "operation" in source
assert "post" in source.lower() or "POST" in source
def test_send_multipart_batch_handles_patch_operation(self, bash_executor):
"""Test handling of 'patch' operation via operation parameter"""
source = bash_executor.get_function_source("send_multipart_batch")
# Patch operations use POST to multipart endpoint but with 'patch' in part names
# The operation parameter is passed to serialize_for_multipart for part naming
assert "operation" in source
assert "serialize_for_multipart" in source
def test_send_multipart_batch_logs_success(self, bash_executor):
"""Test that successful batch is logged"""
source = bash_executor.get_function_source("send_multipart_batch")
assert "log" in source
assert "INFO" in source
assert "succeeded" in source.lower() or "success" in source.lower()
def test_send_multipart_batch_logs_failure(self, bash_executor):
"""Test that failed batch is logged"""
source = bash_executor.get_function_source("send_multipart_batch")
assert "ERROR" in source
assert "failed" in source.lower()
@pytest.mark.unit
class TestCleanupPendingTurn:
"""Tests for cleanup_pending_turn() function"""
def test_cleanup_function_exists(self, bash_executor):
"""Test that cleanup_pending_turn function exists"""
source = bash_executor.get_function_source("cleanup_pending_turn")
assert "cleanup_pending_turn" in source
def test_cleanup_checks_current_turn_id(self, bash_executor):
"""Test that cleanup checks if there's a pending turn"""
source = bash_executor.get_function_source("cleanup_pending_turn")
# Should check CURRENT_TURN_ID
assert "CURRENT_TURN_ID" in source
assert "-n" in source # Test for non-empty
def test_cleanup_sends_patch_request(self, bash_executor):
"""Test that cleanup patches the pending run"""
source = bash_executor.get_function_source("cleanup_pending_turn")
# Should call api_call with PATCH
assert "PATCH" in source
assert "/runs/" in source
def test_cleanup_sets_error_message(self, bash_executor):
"""Test that cleanup sets appropriate error message"""
source = bash_executor.get_function_source("cleanup_pending_turn")
# Should include error message
assert "error" in source.lower()
assert "early" in source.lower() or "incomplete" in source.lower()
def test_cleanup_sets_end_time(self, bash_executor):
"""Test that cleanup sets end_time for the run"""
source = bash_executor.get_function_source("cleanup_pending_turn")
assert "end_time" in source
def test_cleanup_is_set_as_trap(self):
"""Test that cleanup_pending_turn is set as EXIT trap"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should have trap set for cleanup
assert "trap cleanup_pending_turn EXIT" in content
def test_cleanup_ignores_errors(self, bash_executor):
"""Test that cleanup ignores errors (since we're exiting anyway)"""
source = bash_executor.get_function_source("cleanup_pending_turn")
# Should have || true to ignore errors
assert "|| true" in source
@pytest.mark.unit
class TestApiKeyHandling:
"""Tests for API key configuration"""
def test_api_key_from_cc_langsmith_api_key(self):
"""Test that CC_LANGSMITH_API_KEY is checked first"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "CC_LANGSMITH_API_KEY" in content
def test_api_key_fallback_to_langsmith_api_key(self):
"""Test fallback to LANGSMITH_API_KEY"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should have fallback syntax
assert '${CC_LANGSMITH_API_KEY:-$LANGSMITH_API_KEY}' in content
def test_api_key_validation(self):
"""Test that missing API key is handled"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should check if API_KEY is empty
assert '-z "$API_KEY"' in content
assert "not set" in content.lower() or "ERROR" in content
@pytest.mark.unit
class TestHttpResponseHandling:
"""Tests for HTTP response code handling"""
def test_success_codes_accepted(self):
"""Test that 2xx codes are treated as success"""
script = """
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
declare -f api_call
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
source = result.stdout
# Check for 200-299 range logic
assert "200" in source
assert "300" in source
def test_4xx_codes_logged_as_error(self):
"""Test that 4xx codes are logged as errors"""
script = """
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
declare -f api_call
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
source = result.stdout
# Should log HTTP code on error
assert "HTTP" in source
assert "http_code" in source
def test_response_body_logged_on_error(self):
"""Test that response body is logged on error"""
script = """
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
declare -f api_call
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
source = result.stdout
# Should log response
assert "response" in source.lower()
def test_request_data_logged_on_error(self):
"""Test that request data is logged (truncated) on error"""
script = """
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
declare -f api_call
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
source = result.stdout
# Should log request data (truncated to 500 chars)
assert "data" in source
assert "500" in source # Truncation limit
@pytest.mark.unit
class TestProjectConfiguration:
"""Tests for project configuration"""
def test_project_name_from_env(self):
"""Test that project name comes from CC_LANGSMITH_PROJECT"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "CC_LANGSMITH_PROJECT" in content
def test_project_name_default(self):
"""Test that project has default value"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should have default: "claude-code"
assert '${CC_LANGSMITH_PROJECT:-claude-code}' in content
def test_api_base_url(self):
"""Test that API base URL is configured"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "api.smith.langchain.com" in content
+238
View File
@@ -0,0 +1,238 @@
"""
Unit tests for content formatting functions from stop_hook.sh.
Tests:
- format_content() - Convert content to LangSmith format
- merge_assistant_parts() - Merge SSE streaming parts
- get_usage_from_parts() - Extract token usage from parts
"""
import json
import pytest
@pytest.mark.unit
class TestFormatContent:
"""Tests for format_content() function"""
def test_formats_string_content(self, bash_executor):
"""Test converting string to LangSmith format"""
msg = json.dumps({"content": "hello world"})
result = bash_executor.call_function("format_content", msg)
formatted = json.loads(result)
assert isinstance(formatted, list)
assert len(formatted) == 1
assert formatted[0]["type"] == "text"
assert formatted[0]["text"] == "hello world"
def test_formats_array_content(self, bash_executor):
"""Test formatting array with multiple content types"""
msg = json.dumps({
"content": [
{"type": "thinking", "thinking": "analyzing..."},
{"type": "text", "text": "result"},
{"type": "tool_use", "id": "t1", "name": "Read", "input": {}}
]
})
result = bash_executor.call_function("format_content", msg)
formatted = json.loads(result)
assert len(formatted) == 3
assert formatted[0]["type"] == "thinking"
assert formatted[1]["type"] == "text"
# tool_use should be converted to tool_call
assert formatted[2]["type"] == "tool_call"
assert formatted[2]["name"] == "Read"
def test_converts_tool_use_to_tool_call(self, bash_executor):
"""Test that tool_use blocks are converted to tool_call"""
msg = json.dumps({
"content": [
{
"type": "tool_use",
"id": "tool_123",
"name": "Bash",
"input": {"command": "ls"}
}
]
})
result = bash_executor.call_function("format_content", msg)
formatted = json.loads(result)
assert len(formatted) == 1
assert formatted[0]["type"] == "tool_call"
assert formatted[0]["id"] == "tool_123"
assert formatted[0]["name"] == "Bash"
assert formatted[0]["args"] == {"command": "ls"}
def test_handles_empty_content(self, bash_executor):
"""Test default for empty/null content"""
msg = json.dumps({"content": []})
result = bash_executor.call_function("format_content", msg)
formatted = json.loads(result)
# Should return default text block
assert len(formatted) == 1
assert formatted[0]["type"] == "text"
assert formatted[0]["text"] == ""
def test_handles_null_content(self, bash_executor):
"""Test handling null content"""
msg = json.dumps({"content": None})
result = bash_executor.call_function("format_content", msg)
formatted = json.loads(result)
assert len(formatted) == 1
assert formatted[0]["type"] == "text"
assert formatted[0]["text"] == ""
def test_handles_missing_content(self, bash_executor):
"""Test handling messages without content field"""
msg = json.dumps({"message": {"id": "123"}})
result = bash_executor.call_function("format_content", msg)
formatted = json.loads(result)
assert len(formatted) == 1
assert formatted[0]["type"] == "text"
assert formatted[0]["text"] == ""
@pytest.mark.unit
class TestMergeAssistantParts:
"""Tests for merge_assistant_parts() function"""
def test_merges_multiple_parts_with_same_id(self, bash_executor, sample_streaming_parts):
"""Test merging SSE streaming parts"""
parts_json = json.dumps(sample_streaming_parts)
result = bash_executor.call_function("merge_assistant_parts", parts_json)
merged = json.loads(result)
# Check structure
assert "message" in merged
assert "content" in merged["message"]
# Check content was merged
content = merged["message"]["content"]
assert len(content) == 1
assert content[0]["type"] == "text"
assert content[0]["text"] == "Hello world!"
# Check usage is from last part (cumulative)
assert "_usage" in merged["message"]
assert merged["message"]["_usage"]["output_tokens"] == 5
def test_merges_text_blocks_only(self, bash_executor):
"""Test that only adjacent text blocks are merged"""
parts = [
{
"message": {
"id": "msg_1",
"content": [{"type": "text", "text": "Part 1 "}],
"usage": {"input_tokens": 10, "output_tokens": 2}
}
},
{
"message": {
"id": "msg_1",
"content": [{"type": "text", "text": "Part 2"}],
"usage": {"input_tokens": 10, "output_tokens": 4}
}
}
]
result = bash_executor.call_function("merge_assistant_parts", json.dumps(parts))
merged = json.loads(result)
content = merged["message"]["content"]
assert len(content) == 1
assert content[0]["text"] == "Part 1 Part 2"
def test_preserves_non_text_content(self, bash_executor):
"""Test that tool_use blocks are not merged"""
parts = [
{
"message": {
"id": "msg_1",
"content": [
{"type": "text", "text": "Calling tool"},
{"type": "tool_use", "id": "t1", "name": "Read", "input": {}}
],
"usage": {"input_tokens": 10, "output_tokens": 5}
}
}
]
result = bash_executor.call_function("merge_assistant_parts", json.dumps(parts))
merged = json.loads(result)
content = merged["message"]["content"]
assert len(content) == 2
assert content[0]["type"] == "text"
assert content[1]["type"] == "tool_use"
def test_handles_single_part(self, bash_executor):
"""Test that single part is returned as-is"""
parts = [
{
"message": {
"id": "msg_1",
"content": [{"type": "text", "text": "Single part"}],
"usage": {"input_tokens": 10, "output_tokens": 5}
}
}
]
result = bash_executor.call_function("merge_assistant_parts", json.dumps(parts))
merged = json.loads(result)
content = merged["message"]["content"]
assert len(content) == 1
assert content[0]["text"] == "Single part"
@pytest.mark.unit
class TestGetUsageFromParts:
"""Tests for get_usage_from_parts() function"""
def test_extracts_usage_from_last_part(self, bash_executor, sample_streaming_parts):
"""Test extracting usage from last part (cumulative tokens)"""
parts_json = json.dumps(sample_streaming_parts)
result = bash_executor.call_function("get_usage_from_parts", parts_json)
usage = json.loads(result)
# Should get usage from last part (cumulative)
assert usage["input_tokens"] == 10
assert usage["output_tokens"] == 5
def test_extracts_usage_with_cache_tokens(self, bash_executor):
"""Test extracting usage with cache read tokens"""
parts = [
{
"message": {
"usage": {
"input_tokens": 100,
"output_tokens": 50,
"cache_read_input_tokens": 1000,
"cache_creation_input_tokens": 200
}
}
}
]
result = bash_executor.call_function("get_usage_from_parts", json.dumps(parts))
usage = json.loads(result)
assert usage["input_tokens"] == 100
assert usage["output_tokens"] == 50
assert usage["cache_read_input_tokens"] == 1000
assert usage["cache_creation_input_tokens"] == 200
def test_handles_missing_usage(self, bash_executor):
"""Test handling parts without usage field"""
parts = [{"message": {"content": [{"type": "text", "text": "hi"}]}}]
result = bash_executor.call_function("get_usage_from_parts", json.dumps(parts))
# Should return null or empty object
assert result in ["null", "{}"]
+202
View File
@@ -0,0 +1,202 @@
"""
Unit tests for cost tracking and usage metadata from stop_hook.sh.
These tests verify that token usage is correctly tracked for cost monitoring:
- Total input tokens (including cache tokens)
- Output tokens
- Cache token breakdowns (creation vs read)
"""
import json
import pytest
@pytest.mark.unit
class TestUsageMetadata:
"""Tests for usage_metadata calculation (cost tracking)"""
def test_calculates_total_input_tokens_with_cache(self, bash_executor):
"""Test that total input tokens includes cache tokens"""
# This tests the jq logic at line 514:
# input_tokens: ((.input_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0))
usage = {
"input_tokens": 100,
"output_tokens": 50,
"cache_creation_input_tokens": 500,
"cache_read_input_tokens": 2000
}
# Build the usage_metadata jq expression
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
echo '{json.dumps(usage)}' | jq '{{
input_tokens: ((.input_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0)),
output_tokens: (.output_tokens // 0),
input_token_details: {{
cache_creation: (.cache_creation_input_tokens // 0),
cache_read: (.cache_read_input_tokens // 0)
}}
}}'
"""
result = bash_executor.call_function.__self__.call_function.__func__(
bash_executor, "bash", "-c", script
)
metadata = json.loads(result)
# Total input = 100 + 500 + 2000 = 2600
assert metadata["input_tokens"] == 2600
assert metadata["output_tokens"] == 50
assert metadata["input_token_details"]["cache_creation"] == 500
assert metadata["input_token_details"]["cache_read"] == 2000
def test_handles_missing_cache_tokens(self, bash_executor):
"""Test usage metadata when cache tokens are missing"""
usage = {
"input_tokens": 100,
"output_tokens": 50
# No cache tokens
}
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
echo '{json.dumps(usage)}' | jq '{{
input_tokens: ((.input_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0)),
output_tokens: (.output_tokens // 0),
input_token_details: {{
cache_creation: (.cache_creation_input_tokens // 0),
cache_read: (.cache_read_input_tokens // 0)
}}
}}'
"""
result = bash_executor.call_function.__self__.call_function.__func__(
bash_executor, "bash", "-c", script
)
metadata = json.loads(result)
# Total input = 100 + 0 + 0 = 100
assert metadata["input_tokens"] == 100
assert metadata["output_tokens"] == 50
assert metadata["input_token_details"]["cache_creation"] == 0
assert metadata["input_token_details"]["cache_read"] == 0
def test_realistic_usage_scenario(self, bash_executor):
"""Test realistic usage from cc_transcript.jsonl"""
# Real example from line 2 of cc_transcript.jsonl
usage = {
"input_tokens": 9,
"cache_creation_input_tokens": 630,
"cache_read_input_tokens": 18664,
"output_tokens": 8
}
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
echo '{json.dumps(usage)}' | jq '{{
input_tokens: ((.input_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0)),
output_tokens: (.output_tokens // 0),
input_token_details: {{
cache_creation: (.cache_creation_input_tokens // 0),
cache_read: (.cache_read_input_tokens // 0)
}}
}}'
"""
result = bash_executor.call_function.__self__.call_function.__func__(
bash_executor, "bash", "-c", script
)
metadata = json.loads(result)
# Total input = 9 + 630 + 18664 = 19303
assert metadata["input_tokens"] == 19303
assert metadata["output_tokens"] == 8
assert metadata["input_token_details"]["cache_creation"] == 630
assert metadata["input_token_details"]["cache_read"] == 18664
@pytest.mark.unit
class TestCostImplications:
"""Tests verifying cost tracking implications"""
def test_cache_read_reduces_cost(self):
"""Test that cache read tokens are tracked separately (they cost less)"""
# Cache read tokens are ~90% cheaper than regular input tokens
# This test verifies they're tracked in input_token_details
usage_with_cache = {
"input_tokens": 10,
"cache_read_input_tokens": 10000,
"output_tokens": 50
}
usage_without_cache = {
"input_tokens": 10010, # Same total but all regular
"output_tokens": 50
}
# Both have same total input tokens (10010)
# But usage_with_cache is much cheaper due to cache reads
# The tracking in input_token_details enables cost calculation
assert usage_with_cache["input_tokens"] + usage_with_cache.get("cache_read_input_tokens", 0) == 10010
assert usage_without_cache["input_tokens"] == 10010
# Verify cache breakdown is preserved for cost calculation
assert usage_with_cache.get("cache_read_input_tokens") == 10000
def test_cache_creation_tracked_for_write_cost(self):
"""Test that cache creation tokens are tracked (they cost more)"""
# Cache creation tokens cost more (first write to cache)
# This test verifies they're tracked separately
usage = {
"input_tokens": 100,
"cache_creation_input_tokens": 1000,
"output_tokens": 50
}
# Cache creation adds to total input but tracked separately
total_input = usage["input_tokens"] + usage["cache_creation_input_tokens"]
assert total_input == 1100
# Verify cache creation is preserved for cost calculation
assert usage["cache_creation_input_tokens"] == 1000
@pytest.mark.unit
class TestUsageAggregation:
"""Tests for usage aggregation across multiple LLM calls"""
def test_tracks_usage_per_assistant_message(self, bash_executor, sample_streaming_parts):
"""Test that each assistant message has its own usage tracking"""
# Each LLM call should have separate usage metadata
# This is critical for per-call cost attribution
parts = sample_streaming_parts # From fixture
result = bash_executor.call_function("get_usage_from_parts", json.dumps(parts))
usage = json.loads(result)
# Verify usage is extracted (cumulative from streaming)
assert "input_tokens" in usage
assert "output_tokens" in usage
assert usage["output_tokens"] == 5 # Last part has cumulative count
def test_usage_preserved_through_merge(self, bash_executor, sample_streaming_parts):
"""Test that usage is preserved when merging streaming parts"""
# When SSE parts are merged, usage should be preserved
parts = json.dumps(sample_streaming_parts)
result = bash_executor.call_function("merge_assistant_parts", parts)
merged = json.loads(result)
# Usage should be in _usage field after merge
assert "_usage" in merged["message"]
assert merged["message"]["_usage"]["output_tokens"] == 5
assert merged["message"]["_usage"]["input_tokens"] == 10
+557
View File
@@ -0,0 +1,557 @@
"""
Unit tests for main() entry point in stop_hook.sh.
Tests:
- Hook input parsing (session_id, transcript_path)
- stop_hook_active flag handling
- Incremental processing (last_line tracking)
- Turn grouping logic
- Message ID tracking for SSE streaming
- State updates
- Execution time tracking
"""
import json
import subprocess
import tempfile
from pathlib import Path
import pytest
@pytest.mark.unit
class TestHookInputParsing:
"""Tests for parsing hook input JSON"""
def test_extracts_session_id(self):
"""Test that session_id is extracted from hook input"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "session_id" in content
assert '.session_id' in content # jq extraction
def test_extracts_transcript_path(self):
"""Test that transcript_path is extracted from hook input"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "transcript_path" in content
assert '.transcript_path' in content # jq extraction
def test_expands_tilde_in_transcript_path(self):
"""Test that ~ is expanded to $HOME in transcript_path"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should use sed to replace ~
assert 's|^~|$HOME|' in content
def test_validates_session_id_not_empty(self):
"""Test that empty session_id is handled"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert '-z "$session_id"' in content
def test_validates_transcript_file_exists(self):
"""Test that missing transcript file is handled"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert '! -f "$transcript_path"' in content
@pytest.mark.unit
class TestStopHookActiveFlag:
"""Tests for stop_hook_active flag handling"""
def test_checks_stop_hook_active_flag(self):
"""Test that stop_hook_active flag is checked"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "stop_hook_active" in content
def test_exits_when_stop_hook_active_is_true(self):
"""Test that script exits when stop_hook_active is true"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert '.stop_hook_active == true' in content
assert "exit 0" in content
@pytest.mark.unit
class TestIncrementalProcessing:
"""Tests for incremental message processing via last_line tracking"""
def test_loads_state_for_last_line(self):
"""Test that state is loaded to get last_line"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "load_state" in content
assert "last_line" in content
def test_uses_awk_to_skip_processed_lines(self):
"""Test that awk is used to skip already processed lines"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should use awk with NR > start
assert "awk" in content
assert "NR >" in content
def test_tracks_new_last_line(self):
"""Test that new_last_line is tracked during processing"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "new_last_line" in content
def test_updates_state_with_new_last_line(self):
"""Test that state is updated with new last_line"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "save_state" in content
def test_exits_early_if_no_new_messages(self):
"""Test that script exits if no new messages"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "No new messages" in content or "exit 0" in content
@pytest.mark.unit
class TestTurnGrouping:
"""Tests for grouping messages into turns"""
def test_tracks_current_user_message(self):
"""Test that current user message is tracked"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "current_user" in content
def test_tracks_current_assistants_array(self):
"""Test that current assistant messages are tracked as array"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "current_assistants" in content
assert '"[]"' in content or "='[]'" in content
def test_tracks_current_tool_results(self):
"""Test that current tool results are tracked"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "current_tool_results" in content
def test_identifies_user_role(self):
"""Test that user role is identified"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should check for role == "user"
assert '"user"' in content
assert "role" in content
def test_identifies_assistant_role(self):
"""Test that assistant role is identified"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert '"assistant"' in content
def test_new_user_starts_new_turn(self):
"""Test that new user message starts a new turn"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# When user message is found (not tool result), should start new turn
assert "current_user" in content
assert 'current_user="$line"' in content
def test_tool_result_added_to_current_turn(self):
"""Test that tool result is added to current turn"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "is_tool_result" in content
assert "current_tool_results" in content
def test_creates_trace_when_turn_complete(self):
"""Test that create_trace is called when turn is complete"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "create_trace" in content
@pytest.mark.unit
class TestSSEStreamingMerge:
"""Tests for merging SSE streaming message parts"""
def test_tracks_current_msg_id(self):
"""Test that current message ID is tracked for SSE parts"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "current_msg_id" in content
def test_tracks_current_assistant_parts(self):
"""Test that assistant parts are tracked for merging"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "current_assistant_parts" in content
def test_same_msg_id_adds_to_parts(self):
"""Test that same message ID adds to current parts"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should compare msg_id to current_msg_id
assert '$msg_id" = "$current_msg_id"' in content or 'msg_id = "$current_msg_id"' in content
def test_different_msg_id_starts_new_message(self):
"""Test that different message ID starts a new message"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should set current_msg_id to new msg_id
assert 'current_msg_id="$msg_id"' in content
def test_merges_parts_before_new_message(self):
"""Test that parts are merged before starting new message"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "merge_assistant_parts" in content
def test_extracts_message_id_from_line(self):
"""Test that message ID is extracted from each line"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should extract .message.id via jq
assert ".message.id" in content
@pytest.mark.unit
class TestStateUpdates:
"""Tests for state file updates after processing"""
def test_updates_last_line_in_state(self):
"""Test that last_line is updated in state"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "last_line" in content
assert "new_last_line" in content
def test_updates_turn_count_in_state(self):
"""Test that turn_count is updated in state"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "turn_count" in content
def test_updates_timestamp_in_state(self):
"""Test that updated timestamp is set in state"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "updated" in content
def test_state_is_session_specific(self):
"""Test that state is keyed by session_id"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should use session_id as key
assert ".[$sid]" in content or '[$sid]' in content
@pytest.mark.unit
class TestExecutionTimeTracking:
"""Tests for execution time tracking and warnings"""
def test_tracks_script_start_time(self):
"""Test that script start time is recorded"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "script_start" in content
def test_tracks_script_end_time(self):
"""Test that script end time is recorded"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "script_end" in content
def test_calculates_duration(self):
"""Test that duration is calculated"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "duration" in content
def test_logs_execution_time(self):
"""Test that execution time is logged"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should log processing time
assert "duration" in content
assert "log" in content
def test_warns_on_slow_execution(self):
"""Test that warning is logged for slow execution (>3min)"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should warn if > 180 seconds
assert "180" in content
assert "WARN" in content
@pytest.mark.unit
class TestTracingDisabledCheck:
"""Tests for early exit when tracing is disabled"""
def test_checks_trace_to_langsmith_env(self):
"""Test that TRACE_TO_LANGSMITH is checked"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "TRACE_TO_LANGSMITH" in content
def test_case_insensitive_check(self):
"""Test that check is case insensitive"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should use tr to lowercase
assert "tr '[:upper:]' '[:lower:]'" in content
def test_exits_early_when_disabled(self):
"""Test that script exits when tracing disabled"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should have early exit
assert '!= "true"' in content
assert "exit 0" in content
@pytest.mark.unit
class TestRequiredCommandChecks:
"""Tests for required command availability checks"""
def test_checks_jq_available(self):
"""Test that jq availability is checked"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "jq" in content
assert "command -v" in content
def test_checks_curl_available(self):
"""Test that curl availability is checked"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "curl" in content
def test_checks_uuidgen_available(self):
"""Test that uuidgen availability is checked"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "uuidgen" in content
def test_exits_gracefully_if_command_missing(self):
"""Test that script exits gracefully if required command missing"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should exit 0 (not error) if command missing
assert "exit 0" in content
@pytest.mark.unit
class TestFinalTurnProcessing:
"""Tests for processing the final turn at end of transcript"""
def test_processes_pending_assistant_parts(self):
"""Test that pending assistant parts are merged at end"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should check for pending parts after loop
assert "current_msg_id" in content
assert "merge_assistant_parts" in content
def test_processes_final_turn(self):
"""Test that final turn is processed after loop"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
# Should have processing after the while loop
# Look for create_trace call after loop ends
main_section = content[content.find("# Process final turn"):]
assert "create_trace" in main_section
@pytest.mark.unit
class TestLoggingInMain:
"""Tests for logging throughout main function"""
def test_logs_session_start(self):
"""Test that session processing start is logged"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "Processing session" in content
def test_logs_message_count(self):
"""Test that new message count is logged"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "new messages" in content
def test_logs_turns_processed(self):
"""Test that turns processed count is logged"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "turns" in content
def test_logs_invalid_input_warning(self):
"""Test that invalid input is logged as warning"""
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
content = f.read()
assert "WARN" in content
assert "Invalid input" in content
@pytest.mark.unit
class TestMainIntegration:
"""Integration tests for main() with mocked environment"""
def test_main_exits_when_tracing_disabled(self, tmp_path, monkeypatch):
"""Test that main exits early when TRACE_TO_LANGSMITH is not true"""
transcript = tmp_path / "transcript.jsonl"
transcript.write_text('{"type": "user", "content": "hello"}\n')
hook_input = json.dumps({
"session_id": "test-session",
"transcript_path": str(transcript)
})
script = f"""
export TRACE_TO_LANGSMITH="false"
export LOG_FILE="{tmp_path}/hook.log"
cd /Users/tanushreesharma/tracing-claude-code
echo '{hook_input}' | bash stop_hook.sh
echo "Exit code: $?"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True
)
# Should exit 0 (gracefully)
assert "Exit code: 0" in result.stdout
def test_main_exits_when_missing_session_id(self, tmp_path):
"""Test that main exits when session_id is empty"""
transcript = tmp_path / "transcript.jsonl"
transcript.write_text('{"type": "user", "content": "hello"}\n')
hook_input = json.dumps({
"session_id": "",
"transcript_path": str(transcript)
})
script = f"""
export TRACE_TO_LANGSMITH="true"
export CC_LANGSMITH_API_KEY="test-key"
export LOG_FILE="{tmp_path}/hook.log"
cd /Users/tanushreesharma/tracing-claude-code
echo '{hook_input}' | bash stop_hook.sh
echo "Exit code: $?"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True
)
# Should exit 0 (gracefully)
assert "Exit code: 0" in result.stdout
def test_main_exits_when_transcript_missing(self, tmp_path):
"""Test that main exits when transcript file doesn't exist"""
hook_input = json.dumps({
"session_id": "test-session",
"transcript_path": str(tmp_path / "nonexistent.jsonl")
})
script = f"""
export TRACE_TO_LANGSMITH="true"
export CC_LANGSMITH_API_KEY="test-key"
export LOG_FILE="{tmp_path}/hook.log"
cd /Users/tanushreesharma/tracing-claude-code
echo '{hook_input}' | bash stop_hook.sh
echo "Exit code: $?"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True
)
# Should exit 0 (gracefully)
assert "Exit code: 0" in result.stdout
def test_main_exits_when_stop_hook_active(self, tmp_path):
"""Test that main exits when stop_hook_active is true"""
transcript = tmp_path / "transcript.jsonl"
transcript.write_text('{"type": "user", "content": "hello"}\n')
hook_input = json.dumps({
"session_id": "test-session",
"transcript_path": str(transcript),
"stop_hook_active": True
})
script = f"""
export TRACE_TO_LANGSMITH="true"
export CC_LANGSMITH_API_KEY="test-key"
export LOG_FILE="{tmp_path}/hook.log"
cd /Users/tanushreesharma/tracing-claude-code
echo '{hook_input}' | bash stop_hook.sh
echo "Exit code: $?"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True
)
# Should exit 0
assert "Exit code: 0" in result.stdout
+195
View File
@@ -0,0 +1,195 @@
"""
Unit tests for message parsing functions from stop_hook.sh.
Tests:
- get_content() - Extract content from messages
- is_tool_result() - Identify tool result messages
- get_tool_uses() - Extract tool_use blocks
"""
import json
import pytest
@pytest.mark.unit
class TestGetContent:
"""Tests for get_content() function"""
def test_get_content_from_message_wrapper(self, bash_executor):
"""Test extracting content from {message: {content: ...}} format"""
msg = json.dumps({"message": {"content": "hello"}})
result = bash_executor.call_function("get_content", msg)
assert result == '"hello"'
def test_get_content_from_direct_format(self, bash_executor):
"""Test extracting content from {content: ...} format"""
msg = json.dumps({"content": "world"})
result = bash_executor.call_function("get_content", msg)
assert result == '"world"'
def test_get_content_with_array(self, bash_executor):
"""Test extracting array content"""
msg = json.dumps({
"content": [
{"type": "text", "text": "hi"}
]
})
result = bash_executor.call_function("get_content", msg)
parsed = json.loads(result)
assert isinstance(parsed, list)
assert parsed[0]["type"] == "text"
assert parsed[0]["text"] == "hi"
def test_get_content_returns_null_for_invalid(self, bash_executor):
"""Test null return for invalid input"""
msg = json.dumps({})
result = bash_executor.call_function("get_content", msg)
assert result == "null"
def test_get_content_with_nested_message(self, bash_executor, sample_assistant_message):
"""Test extracting content from complex assistant message"""
msg = json.dumps(sample_assistant_message)
result = bash_executor.call_function("get_content", msg)
parsed = json.loads(result)
assert isinstance(parsed, list)
assert len(parsed) == 3 # thinking + text + tool_use
assert parsed[0]["type"] == "thinking"
assert parsed[1]["type"] == "text"
assert parsed[2]["type"] == "tool_use"
@pytest.mark.unit
class TestIsToolResult:
"""Tests for is_tool_result() function"""
def test_identifies_tool_result_message(self, bash_executor):
"""Test identifying messages containing tool_result"""
msg = json.dumps({
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "abc",
"content": "result"
}
]
})
result = bash_executor.call_function("is_tool_result", msg)
assert result == "true"
def test_identifies_tool_result_from_sample(self, bash_executor, sample_tool_result):
"""Test identifying tool result using sample fixture"""
msg = json.dumps(sample_tool_result)
result = bash_executor.call_function("is_tool_result", msg)
assert result == "true"
def test_rejects_non_tool_result(self, bash_executor):
"""Test rejecting normal user messages"""
msg = json.dumps({"role": "user", "content": "hello"})
result = bash_executor.call_function("is_tool_result", msg)
assert result == "false"
def test_rejects_assistant_message(self, bash_executor, sample_assistant_message):
"""Test rejecting assistant messages (even with tool_use)"""
msg = json.dumps(sample_assistant_message)
result = bash_executor.call_function("is_tool_result", msg)
assert result == "false"
def test_handles_string_content(self, bash_executor):
"""Test handling string content (not array)"""
msg = json.dumps({"role": "user", "content": "not an array"})
result = bash_executor.call_function("is_tool_result", msg)
assert result == "false"
def test_handles_empty_content_array(self, bash_executor):
"""Test handling empty content array"""
msg = json.dumps({"role": "user", "content": []})
result = bash_executor.call_function("is_tool_result", msg)
assert result == "false"
@pytest.mark.unit
class TestGetToolUses:
"""Tests for get_tool_uses() function"""
def test_extracts_tool_uses_from_content(self, bash_executor, sample_assistant_message):
"""Test extracting tool_use blocks from assistant message"""
msg = json.dumps(sample_assistant_message)
result = bash_executor.call_function("get_tool_uses", msg)
tools = json.loads(result)
assert isinstance(tools, list)
assert len(tools) == 1
assert tools[0]["type"] == "tool_use"
assert tools[0]["name"] == "Read"
assert tools[0]["id"] == "tool_test_abc"
assert "input" in tools[0]
def test_extracts_multiple_tool_uses(self, bash_executor):
"""Test extracting multiple tool_use blocks"""
msg = json.dumps({
"message": {
"content": [
{"type": "text", "text": "I'll use two tools"},
{
"type": "tool_use",
"id": "tool_1",
"name": "Read",
"input": {"file": "a.txt"}
},
{
"type": "tool_use",
"id": "tool_2",
"name": "Write",
"input": {"file": "b.txt"}
}
]
}
})
result = bash_executor.call_function("get_tool_uses", msg)
tools = json.loads(result)
assert len(tools) == 2
assert tools[0]["name"] == "Read"
assert tools[1]["name"] == "Write"
def test_returns_empty_for_no_tools(self, bash_executor):
"""Test empty array when no tool uses"""
msg = json.dumps({
"message": {
"content": [{"type": "text", "text": "no tools"}]
}
})
result = bash_executor.call_function("get_tool_uses", msg)
tools = json.loads(result)
assert tools == []
def test_handles_string_content(self, bash_executor):
"""Test handling non-array content"""
msg = json.dumps({"content": "string content"})
result = bash_executor.call_function("get_tool_uses", msg)
tools = json.loads(result)
assert tools == []
def test_handles_missing_content(self, bash_executor):
"""Test handling messages without content field"""
msg = json.dumps({"message": {"id": "123"}})
result = bash_executor.call_function("get_tool_uses", msg)
tools = json.loads(result)
assert tools == []
def test_handles_mixed_content_types(self, bash_executor):
"""Test extracting tool_use from mixed content"""
msg = json.dumps({
"content": [
{"type": "thinking", "thinking": "analyzing"},
{"type": "text", "text": "result"},
{"type": "tool_use", "id": "t1", "name": "Bash", "input": {}},
{"type": "text", "text": "more text"}
]
})
result = bash_executor.call_function("get_tool_uses", msg)
tools = json.loads(result)
assert len(tools) == 1
assert tools[0]["name"] == "Bash"
+208
View File
@@ -0,0 +1,208 @@
"""
Unit tests for model name formatting from stop_hook.sh.
Tests verify that model names have date suffixes stripped:
- claude-sonnet-4-5-20250929 -> claude-sonnet-4-5
- claude-opus-4-5-20251101 -> claude-opus-4-5
- claude-haiku-4-20241114 -> claude-haiku-4
"""
import json
import pytest
@pytest.mark.unit
class TestModelNameFormatting:
"""Tests for model name date suffix stripping"""
def test_strips_date_from_sonnet_model(self, bash_executor):
"""Test that date suffix is stripped from claude-sonnet model"""
assistant_msg = {
"message": {
"id": "msg_123",
"role": "assistant",
"model": "claude-sonnet-4-5-20250929",
"content": [{"type": "text", "text": "Hello"}]
}
}
# Extract and format model name (simulating the sed command)
model_full = "claude-sonnet-4-5-20250929"
model_stripped = model_full.rsplit('-', 1)[0] if model_full.split('-')[-1].isdigit() and len(model_full.split('-')[-1]) == 8 else model_full
assert model_stripped == "claude-sonnet-4-5"
def test_strips_date_from_opus_model(self):
"""Test that date suffix is stripped from claude-opus model"""
model_full = "claude-opus-4-5-20251101"
model_stripped = model_full.rsplit('-', 1)[0] if model_full.split('-')[-1].isdigit() and len(model_full.split('-')[-1]) == 8 else model_full
assert model_stripped == "claude-opus-4-5"
def test_strips_date_from_haiku_model(self):
"""Test that date suffix is stripped from claude-haiku model"""
model_full = "claude-haiku-4-20241114"
model_stripped = model_full.rsplit('-', 1)[0] if model_full.split('-')[-1].isdigit() and len(model_full.split('-')[-1]) == 8 else model_full
assert model_stripped == "claude-haiku-4"
def test_handles_model_without_date_suffix(self):
"""Test that models without date suffix remain unchanged"""
model_full = "gpt-4"
model_stripped = model_full.rsplit('-', 1)[0] if model_full.split('-')[-1].isdigit() and len(model_full.split('-')[-1]) == 8 else model_full
assert model_stripped == "gpt-4"
def test_sed_command_strips_date(self):
"""Test the actual sed command used in stop_hook.sh"""
# Test the sed pattern: s/-[0-9]\{8\}$//
# This removes -YYYYMMDD from the end
import subprocess
models = [
("claude-sonnet-4-5-20250929", "claude-sonnet-4-5"),
("claude-opus-4-5-20251101", "claude-opus-4-5"),
("claude-haiku-4-20241114", "claude-haiku-4"),
("claude-sonnet-4-5", "claude-sonnet-4-5"), # No date
("gpt-4", "gpt-4"), # Different format
]
for model_in, expected_out in models:
# Use raw sed command with proper escaping
cmd = f"echo '{model_in}' | sed 's/-[0-9]\\{{8\\}}$//'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
output = result.stdout.strip()
assert output == expected_out, f"Expected '{model_in}' -> '{expected_out}', got '{output}'"
@pytest.mark.unit
class TestModelNameInMetadata:
"""Tests for model name in LangSmith metadata"""
def test_model_name_in_ls_provider_metadata(self):
"""Test that ls_model_name uses stripped model name"""
# From stop_hook.sh line 572:
# extra: {metadata: {ls_provider: "anthropic", ls_model_name: $model}}
model_full = "claude-sonnet-4-5-20250929"
model_stripped = "claude-sonnet-4-5"
metadata = {
"ls_provider": "anthropic",
"ls_model_name": model_stripped
}
assert metadata["ls_model_name"] == "claude-sonnet-4-5"
assert "-20250929" not in metadata["ls_model_name"]
def test_model_name_in_tags(self):
"""Test that model name in tags is also stripped"""
# From stop_hook.sh line 573:
# tags: [$model]
model_stripped = "claude-sonnet-4-5"
tags = [model_stripped]
assert tags[0] == "claude-sonnet-4-5"
assert not any("202" in tag for tag in tags), "Tags should not contain date"
@pytest.mark.unit
class TestRealWorldModelNames:
"""Tests with real model names from cc_transcript.jsonl"""
def test_strips_sonnet_45_date(self):
"""Test with actual Sonnet 4.5 model name"""
# From cc_transcript.jsonl: "claude-sonnet-4-5-20250929"
model_full = "claude-sonnet-4-5-20250929"
model_stripped = model_full.rsplit('-', 1)[0] if model_full.split('-')[-1].isdigit() and len(model_full.split('-')[-1]) == 8 else model_full
assert model_stripped == "claude-sonnet-4-5"
assert len(model_stripped.split('-')) == 4 # claude-sonnet-4-5 has 4 parts
def test_date_format_validation(self):
"""Test that only 8-digit dates are stripped"""
# Should strip 8-digit dates
assert "claude-sonnet-4-5-20250929".rsplit('-', 1)[0] == "claude-sonnet-4-5"
# Should NOT strip non-date suffixes
model = "claude-sonnet-4-5-beta"
model_stripped = model.rsplit('-', 1)[0] if model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
assert model_stripped == "claude-sonnet-4-5-beta"
# Should NOT strip short numbers
model = "gpt-4"
model_stripped = model.rsplit('-', 1)[0] if model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
assert model_stripped == "gpt-4"
@pytest.mark.unit
class TestEdgeCases:
"""Tests for edge cases in model name stripping"""
def test_multiple_dates_only_strips_last(self):
"""Test that only the last date suffix is stripped"""
# Hypothetical edge case: model-20240101-20250929
model = "model-20240101-20250929"
model_stripped = model.rsplit('-', 1)[0] if model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
# Should only strip the last date
assert model_stripped == "model-20240101"
def test_empty_model_name(self):
"""Test handling of empty model name"""
model = ""
model_stripped = model.rsplit('-', 1)[0] if model and model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
assert model_stripped == ""
def test_model_name_without_hyphens(self):
"""Test model name without hyphens"""
model = "gpt4"
model_stripped = model.rsplit('-', 1)[0] if model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
assert model_stripped == "gpt4"
def test_preserves_version_numbers(self):
"""Test that version numbers (not dates) are preserved"""
# Should preserve: claude-3-5-sonnet (version 3.5)
model = "claude-3-5-sonnet-20241022"
model_stripped = model.rsplit('-', 1)[0] if model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
assert model_stripped == "claude-3-5-sonnet"
assert "3-5" in model_stripped # Version preserved
@pytest.mark.unit
class TestIntegrationWithSampleData:
"""Integration tests using sample fixtures"""
def test_model_name_extraction_from_sample_assistant(self, sample_assistant_message):
"""Test model name extraction from fixture"""
model = sample_assistant_message["message"]["model"]
# Verify it's the full format
assert model == "claude-sonnet-4-5-20250929"
# Strip date
model_stripped = model.rsplit('-', 1)[0] if model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
# Verify stripped format
assert model_stripped == "claude-sonnet-4-5"
assert "20250929" not in model_stripped
def test_all_claude_45_variants(self):
"""Test stripping works for all Claude 4.5 model variants"""
models = {
"claude-sonnet-4-5-20250929": "claude-sonnet-4-5",
"claude-opus-4-5-20251101": "claude-opus-4-5",
"claude-haiku-4-20241114": "claude-haiku-4",
}
for full_name, expected in models.items():
stripped = full_name.rsplit('-', 1)[0] if full_name.split('-')[-1].isdigit() and len(full_name.split('-')[-1]) == 8 else full_name
assert stripped == expected, f"Failed for {full_name}: got {stripped}, expected {expected}"
+480
View File
@@ -0,0 +1,480 @@
"""
Unit tests for multipart serialization in stop_hook.sh.
Tests:
- serialize_for_multipart() - Serialize run data for multipart upload
- File creation with Content-Length headers
- Inputs/outputs extraction and serialization
"""
import json
import os
import subprocess
import tempfile
from pathlib import Path
import pytest
@pytest.mark.unit
class TestSerializeForMultipart:
"""Tests for serialize_for_multipart() function"""
def test_function_exists(self, bash_executor):
"""Test that serialize_for_multipart function exists"""
source = bash_executor.get_function_source("serialize_for_multipart")
assert "serialize_for_multipart" in source
def test_accepts_operation_parameter(self, bash_executor):
"""Test that function accepts operation parameter (post/patch)"""
source = bash_executor.get_function_source("serialize_for_multipart")
assert "operation" in source
def test_accepts_run_json_parameter(self, bash_executor):
"""Test that function accepts run_json parameter"""
source = bash_executor.get_function_source("serialize_for_multipart")
assert "run_json" in source
def test_accepts_temp_dir_parameter(self, bash_executor):
"""Test that function accepts temp_dir parameter"""
source = bash_executor.get_function_source("serialize_for_multipart")
assert "temp_dir" in source
def test_extracts_run_id(self, bash_executor):
"""Test that run_id is extracted from run_json"""
source = bash_executor.get_function_source("serialize_for_multipart")
assert "run_id" in source
assert ".id" in source # jq extraction
def test_extracts_inputs(self, bash_executor):
"""Test that inputs are extracted from run_json"""
source = bash_executor.get_function_source("serialize_for_multipart")
assert "inputs" in source
assert ".inputs" in source # jq extraction
def test_extracts_outputs(self, bash_executor):
"""Test that outputs are extracted from run_json"""
source = bash_executor.get_function_source("serialize_for_multipart")
assert "outputs" in source
assert ".outputs" in source # jq extraction
def test_creates_main_data_file(self, bash_executor):
"""Test that main run data file is created"""
source = bash_executor.get_function_source("serialize_for_multipart")
# Should create main file
assert "main_file" in source
assert "_main.json" in source
def test_uses_get_file_size(self, bash_executor):
"""Test that get_file_size is used for Content-Length"""
source = bash_executor.get_function_source("serialize_for_multipart")
assert "get_file_size" in source
def test_outputs_curl_f_arguments(self, bash_executor):
"""Test that function outputs curl -F arguments"""
source = bash_executor.get_function_source("serialize_for_multipart")
# Should output -F flag
assert '"-F"' in source or "echo \"-F\"" in source
def test_includes_content_length_header(self, bash_executor):
"""Test that Content-Length header is included"""
source = bash_executor.get_function_source("serialize_for_multipart")
assert "Content-Length" in source
@pytest.mark.unit
class TestMultipartFileFormat:
"""Tests for multipart file format and naming"""
def test_main_file_naming_convention(self, bash_executor):
"""Test main file naming: {operation}_{run_id}_main.json"""
source = bash_executor.get_function_source("serialize_for_multipart")
# Should include operation and run_id in filename
assert "${operation}" in source or "$operation" in source
assert "${run_id}" in source or "$run_id" in source
assert "_main.json" in source
def test_inputs_file_naming_convention(self, bash_executor):
"""Test inputs file naming: {operation}_{run_id}_inputs.json"""
source = bash_executor.get_function_source("serialize_for_multipart")
assert "_inputs.json" in source
def test_outputs_file_naming_convention(self, bash_executor):
"""Test outputs file naming: {operation}_{run_id}_outputs.json"""
source = bash_executor.get_function_source("serialize_for_multipart")
assert "_outputs.json" in source
def test_multipart_part_naming(self, bash_executor):
"""Test multipart part naming: {operation}.{run_id}"""
source = bash_executor.get_function_source("serialize_for_multipart")
# Part name format: post.uuid or patch.uuid
assert "${operation}.${run_id}" in source or "$operation.$run_id" in source
@pytest.mark.unit
class TestMultipartDataSeparation:
"""Tests for separating main data from inputs/outputs"""
def test_main_data_excludes_inputs(self, bash_executor):
"""Test that main_data excludes inputs field"""
source = bash_executor.get_function_source("serialize_for_multipart")
# Should use jq to delete inputs
assert "del(.inputs" in source
def test_main_data_excludes_outputs(self, bash_executor):
"""Test that main_data excludes outputs field"""
source = bash_executor.get_function_source("serialize_for_multipart")
# Should use jq to delete outputs
assert "del(" in source
assert ".outputs" in source
def test_inputs_only_created_if_present(self, bash_executor):
"""Test that inputs file is only created if inputs exist"""
source = bash_executor.get_function_source("serialize_for_multipart")
# Should check if inputs is not null/empty
assert '"null"' in source or "null" in source
assert "-n" in source # Test for non-empty
def test_outputs_only_created_if_present(self, bash_executor):
"""Test that outputs file is only created if outputs exist"""
source = bash_executor.get_function_source("serialize_for_multipart")
# Should check if outputs is not null/empty
assert "outputs" in source
@pytest.mark.unit
class TestSerializeForMultipartIntegration:
"""Integration tests for serialize_for_multipart with actual data"""
def test_serialize_post_run(self, tmp_path):
"""Test serializing a POST run"""
run_data = {
"id": "test-run-123",
"name": "Test Run",
"run_type": "llm",
"inputs": {"messages": [{"role": "user", "content": "Hello"}]},
"start_time": "2025-01-01T00:00:00Z"
}
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
temp_dir="{tmp_path}"
run_json='{json.dumps(run_data)}'
serialize_for_multipart "post" "$run_json" "$temp_dir"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
# Check output contains -F arguments
output = result.stdout
assert "-F" in output
assert "post.test-run-123" in output
# Check that main file was created
main_file = tmp_path / "post_test-run-123_main.json"
assert main_file.exists()
# Check that inputs file was created
inputs_file = tmp_path / "post_test-run-123_inputs.json"
assert inputs_file.exists()
def test_serialize_patch_run(self, tmp_path):
"""Test serializing a PATCH run"""
run_data = {
"id": "test-run-456",
"outputs": {"messages": [{"role": "assistant", "content": "Hi"}]},
"end_time": "2025-01-01T00:00:01Z"
}
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
temp_dir="{tmp_path}"
run_json='{json.dumps(run_data)}'
serialize_for_multipart "patch" "$run_json" "$temp_dir"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
output = result.stdout
assert "-F" in output
assert "patch.test-run-456" in output
# Check that outputs file was created
outputs_file = tmp_path / "patch_test-run-456_outputs.json"
assert outputs_file.exists()
def test_serialize_run_without_inputs(self, tmp_path):
"""Test serializing a run without inputs"""
run_data = {
"id": "test-run-789",
"name": "Test Run",
"run_type": "llm",
"start_time": "2025-01-01T00:00:00Z"
}
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
temp_dir="{tmp_path}"
run_json='{json.dumps(run_data)}'
serialize_for_multipart "post" "$run_json" "$temp_dir"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
# Main file should exist
main_file = tmp_path / "post_test-run-789_main.json"
assert main_file.exists()
# Inputs file should NOT exist (no inputs)
inputs_file = tmp_path / "post_test-run-789_inputs.json"
assert not inputs_file.exists()
def test_main_file_excludes_inputs_outputs(self, tmp_path):
"""Test that main file doesn't contain inputs/outputs"""
run_data = {
"id": "test-run-abc",
"name": "Test Run",
"run_type": "llm",
"inputs": {"messages": []},
"outputs": {"messages": []},
"start_time": "2025-01-01T00:00:00Z"
}
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
temp_dir="{tmp_path}"
run_json='{json.dumps(run_data)}'
serialize_for_multipart "post" "$run_json" "$temp_dir"
"""
subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
# Read main file and verify it doesn't have inputs/outputs
main_file = tmp_path / "post_test-run-abc_main.json"
main_content = json.loads(main_file.read_text())
assert "inputs" not in main_content
assert "outputs" not in main_content
assert main_content["id"] == "test-run-abc"
assert main_content["name"] == "Test Run"
def test_content_length_header_is_accurate(self, tmp_path):
"""Test that Content-Length header matches actual file size"""
run_data = {
"id": "test-run-size",
"name": "Size Test",
"run_type": "llm",
"inputs": {"data": "test" * 100}, # Some data
"start_time": "2025-01-01T00:00:00Z"
}
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
temp_dir="{tmp_path}"
run_json='{json.dumps(run_data)}'
serialize_for_multipart "post" "$run_json" "$temp_dir"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
output = result.stdout
# Extract Content-Length from output
for line in output.split("\n"):
if "Content-Length:" in line:
# Parse the size
size_str = line.split("Content-Length:")[1].strip()
claimed_size = int(size_str)
# Find the corresponding file and check its actual size
if "_main.json" in line:
actual_size = os.path.getsize(tmp_path / "post_test-run-size_main.json")
assert claimed_size == actual_size
elif "_inputs.json" in line:
actual_size = os.path.getsize(tmp_path / "post_test-run-size_inputs.json")
assert claimed_size == actual_size
@pytest.mark.unit
class TestMultipartCurlFormat:
"""Tests for curl -F argument format"""
def test_curl_f_format_with_file_reference(self, tmp_path):
"""Test that -F uses file reference with <"""
run_data = {
"id": "test-curl-format",
"name": "Test",
"run_type": "llm",
"start_time": "2025-01-01T00:00:00Z"
}
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
temp_dir="{tmp_path}"
run_json='{json.dumps(run_data)}'
serialize_for_multipart "post" "$run_json" "$temp_dir"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
output = result.stdout
# Should use < for file reference
assert "<" in output or "@" in output # curl uses < or @ for files
def test_curl_f_includes_content_type(self, tmp_path):
"""Test that -F includes application/json content type"""
run_data = {
"id": "test-content-type",
"name": "Test",
"run_type": "llm",
"start_time": "2025-01-01T00:00:00Z"
}
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
temp_dir="{tmp_path}"
run_json='{json.dumps(run_data)}'
serialize_for_multipart "post" "$run_json" "$temp_dir"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
output = result.stdout
# Should include content type
assert "application/json" in output
def test_inputs_part_naming(self, tmp_path):
"""Test that inputs part is named correctly: {operation}.{run_id}.inputs"""
run_data = {
"id": "test-inputs-name",
"inputs": {"test": "data"},
"start_time": "2025-01-01T00:00:00Z"
}
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
temp_dir="{tmp_path}"
run_json='{json.dumps(run_data)}'
serialize_for_multipart "post" "$run_json" "$temp_dir"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
output = result.stdout
# Should have inputs part named post.{id}.inputs
assert "post.test-inputs-name.inputs" in output
def test_outputs_part_naming(self, tmp_path):
"""Test that outputs part is named correctly: {operation}.{run_id}.outputs"""
run_data = {
"id": "test-outputs-name",
"outputs": {"test": "data"},
"end_time": "2025-01-01T00:00:00Z"
}
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
temp_dir="{tmp_path}"
run_json='{json.dumps(run_data)}'
serialize_for_multipart "patch" "$run_json" "$temp_dir"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
output = result.stdout
# Should have outputs part named patch.{id}.outputs
assert "patch.test-outputs-name.outputs" in output
+107
View File
@@ -0,0 +1,107 @@
"""
Unit tests for state management functions from stop_hook.sh.
Tests:
- load_state() - Read langsmith_state.json
- save_state() - Write state file
"""
import json
import pytest
@pytest.mark.unit
class TestStateManagement:
"""Tests for load_state() and save_state() functions"""
def test_load_state_returns_empty_for_missing_file(self, bash_executor, temp_state_file):
"""Test loading state when file doesn't exist"""
# temp_state_file doesn't exist yet
result = bash_executor.call_function("load_state")
loaded = json.loads(result)
assert loaded == {}
def test_save_and_load_state(self, bash_executor, temp_state_file, state_manager):
"""Test round-trip state persistence"""
state = {
"session_123": {
"last_line": 42,
"turn_count": 5,
"updated": "2025-01-01T00:00:00Z"
}
}
# Save state using state_manager (creates the file)
state_manager.save(state)
# Load using bash function
result = bash_executor.call_function("load_state")
loaded = json.loads(result)
assert loaded == state
assert loaded["session_123"]["last_line"] == 42
assert loaded["session_123"]["turn_count"] == 5
def test_state_tracks_multiple_sessions(self, bash_executor, state_manager):
"""Test state management for multiple sessions"""
state = {
"session_1": {"last_line": 10, "turn_count": 1},
"session_2": {"last_line": 20, "turn_count": 2}
}
state_manager.save(state)
result = bash_executor.call_function("load_state")
loaded = json.loads(result)
assert "session_1" in loaded
assert "session_2" in loaded
assert loaded["session_1"]["last_line"] == 10
assert loaded["session_2"]["last_line"] == 20
def test_save_state_creates_directory(self, bash_executor, tmp_path):
"""Test that save_state creates parent directory if needed"""
# Use a nested path that doesn't exist
nested_state_file = tmp_path / "nested" / "dir" / "state.json"
state = {"test": {"value": 123}}
# Manually set STATE_FILE env var for this test
import os
old_state_file = os.environ.get("STATE_FILE")
os.environ["STATE_FILE"] = str(nested_state_file)
try:
bash_executor.call_function("save_state", json.dumps(state))
# Verify file was created
assert nested_state_file.exists()
# Verify content
loaded_content = json.loads(nested_state_file.read_text())
assert loaded_content == state
finally:
if old_state_file:
os.environ["STATE_FILE"] = old_state_file
def test_save_state_with_complex_data(self, bash_executor, state_manager):
"""Test saving complex state data"""
state = {
"session_abc": {
"last_line": 100,
"turn_count": 25,
"updated": "2025-01-01T12:34:56Z",
"metadata": {
"model": "claude-sonnet-4-5-20250929",
"total_tokens": 5000
}
}
}
state_manager.save(state)
result = bash_executor.call_function("load_state")
loaded = json.loads(result)
assert loaded == state
assert loaded["session_abc"]["metadata"]["model"] == "claude-sonnet-4-5-20250929"
+379
View File
@@ -0,0 +1,379 @@
"""
Unit tests for timestamp conversion in stop_hook.sh.
Tests the ISO timestamp to dotted_order format conversion:
- ISO format: 2025-12-16T17:44:04.397Z
- dotted_order format: 20251216T174404397000Z
This conversion is critical for proper trace ordering in LangSmith.
"""
import json
import subprocess
import pytest
from datetime import datetime
@pytest.mark.unit
class TestISOToDottedOrderConversion:
"""Tests for ISO timestamp to dotted_order conversion using sed"""
def test_converts_iso_to_dotted_order_format(self):
"""Test basic ISO to dotted_order conversion"""
# The sed command from stop_hook.sh line 537:
# sed 's/[-:]//g; s/\.\([0-9]*\)Z$/\1000Z/; s/T\([0-9]*\)\([0-9]\{3\}\)000Z$/T\1\2000Z/'
iso_timestamp = "2025-12-16T17:44:04.397Z"
expected = "20251216T174404397000Z"
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
assert result.stdout.strip() == expected
def test_converts_single_digit_milliseconds(self):
"""Test conversion with single digit milliseconds (e.g., .1Z)"""
iso_timestamp = "2025-12-16T17:44:04.1Z"
# .1 -> 1000Z (padded to microseconds)
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
# Should produce 1000 (1 padded with zeros for microseconds)
output = result.stdout.strip()
assert "T1744041000Z" in output
def test_converts_two_digit_milliseconds(self):
"""Test conversion with two digit milliseconds (e.g., .12Z)"""
iso_timestamp = "2025-12-16T17:44:04.12Z"
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
output = result.stdout.strip()
assert "T17440412000Z" in output
def test_converts_full_milliseconds(self):
"""Test conversion with full 3-digit milliseconds"""
iso_timestamp = "2025-12-16T17:44:04.123Z"
expected = "20251216T174404123000Z"
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
assert result.stdout.strip() == expected
def test_removes_dashes_from_date(self):
"""Test that dashes are removed from date portion"""
iso_timestamp = "2025-12-16T17:44:04.000Z"
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
output = result.stdout.strip()
# Date should be 20251216 not 2025-12-16
assert output.startswith("20251216T")
assert "-" not in output
def test_removes_colons_from_time(self):
"""Test that colons are removed from time portion"""
iso_timestamp = "2025-12-16T17:44:04.000Z"
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
output = result.stdout.strip()
# Time should be 174404 not 17:44:04
assert "T174404" in output
assert ":" not in output
def test_preserves_z_suffix(self):
"""Test that Z suffix is preserved"""
iso_timestamp = "2025-12-16T17:44:04.123Z"
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
assert result.stdout.strip().endswith("Z")
def test_pads_milliseconds_to_microseconds(self):
"""Test that milliseconds are padded to 6 digits (microseconds)"""
# 397 milliseconds should become 397000 microseconds
iso_timestamp = "2025-12-16T17:44:04.397Z"
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
output = result.stdout.strip()
# 397 padded to 397000
assert "397000Z" in output
@pytest.mark.unit
class TestDottedOrderTimestampFormat:
"""Tests for generating dotted_order timestamps"""
def test_dotted_timestamp_format(self, bash_executor):
"""Test that dotted timestamp has correct format"""
# Generate a timestamp using the same logic as stop_hook.sh
script = """
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
dotted_timestamp=$(date -u +"%Y%m%dT%H%M%S")
microseconds=$(get_microseconds)
dotted_timestamp="${dotted_timestamp}${microseconds}Z"
echo "$dotted_timestamp"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
output = result.stdout.strip()
# Format should be: YYYYMMDDTHHMMSSffffffZ (22 chars)
# YYYYMMDD (8) + T (1) + HHMMSS (6) + ffffff (6) + Z (1) = 22
assert len(output) == 22
assert output[8] == "T" # T separator
assert output[-1] == "Z" # Z suffix
assert output[:8].isdigit() # Date digits
assert output[9:21].isdigit() # Time + microseconds (HHMMSS + ffffff)
def test_dotted_timestamp_year_month_day(self, bash_executor):
"""Test that date portion is correct format"""
script = """
dotted_timestamp=$(date -u +"%Y%m%dT%H%M%S")
echo "${dotted_timestamp:0:8}"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True
)
output = result.stdout.strip()
# Should be YYYYMMDD
assert len(output) == 8
year = int(output[:4])
month = int(output[4:6])
day = int(output[6:8])
assert 2020 <= year <= 2030
assert 1 <= month <= 12
assert 1 <= day <= 31
@pytest.mark.unit
class TestTimestampChronologicalOrdering:
"""Tests verifying timestamps sort chronologically"""
def test_earlier_timestamp_sorts_first(self):
"""Test that earlier ISO timestamps produce earlier dotted_orders"""
timestamps = [
"2025-12-16T17:44:04.100Z",
"2025-12-16T17:44:04.200Z",
"2025-12-16T17:44:04.300Z",
]
dotted_orders = []
for ts in timestamps:
cmd = f"echo '{ts}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
dotted_orders.append(result.stdout.strip())
# Should already be sorted chronologically
assert dotted_orders == sorted(dotted_orders)
def test_different_seconds_sort_correctly(self):
"""Test that timestamps with different seconds sort correctly"""
timestamps = [
"2025-12-16T17:44:05.000Z", # Later
"2025-12-16T17:44:04.999Z", # Earlier (despite higher ms)
]
dotted_orders = []
for ts in timestamps:
cmd = f"echo '{ts}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
dotted_orders.append(result.stdout.strip())
# Sort and verify order
sorted_orders = sorted(dotted_orders)
# The 04.999 should come before 05.000
assert "174404" in sorted_orders[0]
assert "174405" in sorted_orders[1]
def test_different_dates_sort_correctly(self):
"""Test that different dates sort correctly"""
timestamps = [
"2025-12-17T00:00:00.000Z",
"2025-12-16T23:59:59.999Z",
]
dotted_orders = []
for ts in timestamps:
cmd = f"echo '{ts}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
dotted_orders.append(result.stdout.strip())
sorted_orders = sorted(dotted_orders)
# Dec 16 should come before Dec 17
assert "20251216" in sorted_orders[0]
assert "20251217" in sorted_orders[1]
@pytest.mark.unit
class TestTimestampEdgeCases:
"""Tests for edge cases in timestamp handling"""
def test_handles_midnight_timestamp(self):
"""Test handling of midnight timestamp"""
iso_timestamp = "2025-12-16T00:00:00.000Z"
expected = "20251216T000000000000Z"
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
assert result.stdout.strip() == expected
def test_handles_end_of_day_timestamp(self):
"""Test handling of 23:59:59.999 timestamp"""
iso_timestamp = "2025-12-16T23:59:59.999Z"
expected = "20251216T235959999000Z"
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
assert result.stdout.strip() == expected
def test_handles_zero_milliseconds(self):
"""Test handling of .000 milliseconds"""
iso_timestamp = "2025-12-16T12:30:45.000Z"
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
output = result.stdout.strip()
assert "000000Z" in output # 000 padded to 000000
def test_handles_leap_year_date(self):
"""Test handling of Feb 29 in a leap year"""
iso_timestamp = "2024-02-29T12:00:00.500Z"
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
output = result.stdout.strip()
assert output.startswith("20240229T")
@pytest.mark.unit
class TestTimestampWithRealTranscriptData:
"""Tests using real timestamp formats from cc_transcript.jsonl"""
def test_converts_real_transcript_timestamp(self):
"""Test with actual timestamp format from cc_transcript.jsonl"""
# Example from line 2: "timestamp":"2024-12-06T06:42:11.556Z"
iso_timestamp = "2024-12-06T06:42:11.556Z"
expected = "20241206T064211556000Z"
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
assert result.stdout.strip() == expected
def test_multiple_transcript_timestamps_maintain_order(self):
"""Test that multiple timestamps from transcript maintain chronological order"""
# Simulated sequence of timestamps from a transcript
timestamps = [
"2024-12-06T06:42:11.556Z", # User message
"2024-12-06T06:42:12.100Z", # Assistant response
"2024-12-06T06:42:12.500Z", # Tool result
"2024-12-06T06:42:13.200Z", # Final response
]
dotted_orders = []
for ts in timestamps:
cmd = f"echo '{ts}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
result = subprocess.run(
["bash", "-c", cmd],
capture_output=True,
text=True
)
dotted_orders.append(result.stdout.strip())
# Verify they're in chronological order
assert dotted_orders == sorted(dotted_orders)
# Verify each is unique
assert len(set(dotted_orders)) == len(dotted_orders)
+541
View File
@@ -0,0 +1,541 @@
"""
Unit tests for trace creation in stop_hook.sh.
Tests:
- create_trace() - Main trace creation logic
- Turn run structure
- LLM (assistant) run structure
- Tool run structure
- Parent-child relationships
- Usage metadata
- Dotted order hierarchy
"""
import json
import subprocess
import uuid
from datetime import datetime
import pytest
@pytest.mark.unit
class TestCreateTraceFunction:
"""Tests for create_trace() function existence and structure"""
def test_function_exists(self, bash_executor):
"""Test that create_trace function exists"""
source = bash_executor.get_function_source("create_trace")
assert "create_trace" in source
def test_accepts_session_id_parameter(self, bash_executor):
"""Test that function accepts session_id parameter"""
source = bash_executor.get_function_source("create_trace")
assert "session_id" in source
def test_accepts_turn_num_parameter(self, bash_executor):
"""Test that function accepts turn_num parameter"""
source = bash_executor.get_function_source("create_trace")
assert "turn_num" in source
def test_accepts_user_msg_parameter(self, bash_executor):
"""Test that function accepts user_msg parameter"""
source = bash_executor.get_function_source("create_trace")
assert "user_msg" in source
def test_accepts_assistant_messages_parameter(self, bash_executor):
"""Test that function accepts assistant_messages parameter"""
source = bash_executor.get_function_source("create_trace")
assert "assistant_messages" in source
def test_accepts_tool_results_parameter(self, bash_executor):
"""Test that function accepts tool_results parameter"""
source = bash_executor.get_function_source("create_trace")
assert "tool_results" in source
@pytest.mark.unit
class TestTurnRunCreation:
"""Tests for turn (top-level chain) run creation"""
def test_creates_turn_run_with_chain_type(self, bash_executor):
"""Test that turn run has run_type: chain"""
source = bash_executor.get_function_source("create_trace")
assert '"chain"' in source
assert "run_type" in source
def test_turn_run_has_unique_id(self, bash_executor):
"""Test that turn run gets a unique UUID"""
source = bash_executor.get_function_source("create_trace")
assert "uuidgen" in source
assert "turn_id" in source
def test_turn_run_name_is_claude_code(self, bash_executor):
"""Test that turn run is named 'Claude Code'"""
source = bash_executor.get_function_source("create_trace")
assert '"Claude Code"' in source
def test_turn_run_has_dotted_order(self, bash_executor):
"""Test that turn run has dotted_order field"""
source = bash_executor.get_function_source("create_trace")
assert "dotted_order" in source
assert "turn_dotted_order" in source
def test_turn_run_trace_id_equals_run_id(self, bash_executor):
"""Test that for top-level run, trace_id = run_id"""
source = bash_executor.get_function_source("create_trace")
# trace_id: $turn_id (same as run id)
assert "trace_id" in source
def test_turn_run_has_session_name(self, bash_executor):
"""Test that turn run has session_name (project)"""
source = bash_executor.get_function_source("create_trace")
assert "session_name" in source
assert "project" in source.lower() or "PROJECT" in source
def test_turn_run_has_thread_id_metadata(self, bash_executor):
"""Test that turn run has thread_id in metadata"""
source = bash_executor.get_function_source("create_trace")
assert "thread_id" in source
assert "session" in source
def test_turn_run_has_tags(self, bash_executor):
"""Test that turn run has appropriate tags"""
source = bash_executor.get_function_source("create_trace")
assert '"claude-code"' in source
assert "turn-" in source # turn-N tag
@pytest.mark.unit
class TestAssistantRunCreation:
"""Tests for assistant (LLM) run creation"""
def test_creates_llm_run_type(self, bash_executor):
"""Test that assistant run has run_type: llm"""
source = bash_executor.get_function_source("create_trace")
assert '"llm"' in source
def test_assistant_run_has_unique_id(self, bash_executor):
"""Test that assistant run gets a unique UUID"""
source = bash_executor.get_function_source("create_trace")
assert "assistant_id" in source
assert "uuidgen" in source
def test_assistant_run_name_is_claude(self, bash_executor):
"""Test that assistant run is named 'Claude'"""
source = bash_executor.get_function_source("create_trace")
assert '"Claude"' in source
def test_assistant_run_has_parent_run_id(self, bash_executor):
"""Test that assistant run references turn as parent"""
source = bash_executor.get_function_source("create_trace")
assert "parent_run_id" in source
def test_assistant_run_has_trace_id(self, bash_executor):
"""Test that assistant run has trace_id from parent"""
source = bash_executor.get_function_source("create_trace")
assert "trace_id" in source
def test_assistant_run_has_model_in_metadata(self, bash_executor):
"""Test that assistant run has model in metadata"""
source = bash_executor.get_function_source("create_trace")
assert "ls_model_name" in source
assert "ls_provider" in source
assert "anthropic" in source
def test_assistant_run_has_model_in_tags(self, bash_executor):
"""Test that model name is in tags"""
source = bash_executor.get_function_source("create_trace")
assert "tags" in source
assert "model" in source
def test_assistant_run_has_dotted_order(self, bash_executor):
"""Test that assistant run has dotted_order as child of turn"""
source = bash_executor.get_function_source("create_trace")
assert "assistant_dotted_order" in source
@pytest.mark.unit
class TestToolRunCreation:
"""Tests for tool run creation"""
def test_creates_tool_run_type(self, bash_executor):
"""Test that tool run has run_type: tool"""
source = bash_executor.get_function_source("create_trace")
assert '"tool"' in source
def test_tool_run_has_unique_id(self, bash_executor):
"""Test that tool run gets a unique UUID"""
source = bash_executor.get_function_source("create_trace")
assert "tool_id" in source
def test_tool_run_has_tool_name(self, bash_executor):
"""Test that tool run uses the tool's name"""
source = bash_executor.get_function_source("create_trace")
assert "tool_name" in source
def test_tool_run_has_parent_as_turn(self, bash_executor):
"""Test that tool run has turn as parent (sibling of assistant)"""
source = bash_executor.get_function_source("create_trace")
# Tools are children of turn, not assistant
assert "parent_run_id" in source
assert "turn_id" in source
def test_tool_run_has_input(self, bash_executor):
"""Test that tool run includes tool input"""
source = bash_executor.get_function_source("create_trace")
assert "tool_input" in source
assert "input" in source
def test_tool_run_has_dotted_order(self, bash_executor):
"""Test that tool run has dotted_order"""
source = bash_executor.get_function_source("create_trace")
assert "tool_dotted_order" in source
def test_tool_run_has_tool_tag(self, bash_executor):
"""Test that tool run has 'tool' tag"""
source = bash_executor.get_function_source("create_trace")
assert '"tool"' in source
@pytest.mark.unit
class TestFindToolResultWithTimestamp:
"""Tests for find_tool_result_with_timestamp() function"""
def test_function_exists(self, bash_executor):
"""Test that find_tool_result_with_timestamp function exists"""
source = bash_executor.get_function_source("find_tool_result_with_timestamp")
assert "find_tool_result_with_timestamp" in source
def test_accepts_tool_id_parameter(self, bash_executor):
"""Test that function accepts tool_id parameter"""
source = bash_executor.get_function_source("find_tool_result_with_timestamp")
assert "tool_id" in source
def test_accepts_tool_results_parameter(self, bash_executor):
"""Test that function accepts tool_results parameter"""
source = bash_executor.get_function_source("find_tool_result_with_timestamp")
assert "tool_results" in source
def test_returns_result_and_timestamp(self, bash_executor):
"""Test that function returns both result and timestamp"""
source = bash_executor.get_function_source("find_tool_result_with_timestamp")
assert "result" in source
assert "timestamp" in source
def test_finds_tool_result_by_id(self):
"""Test finding tool result by tool_use_id"""
tool_results = [
{
"type": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "tool_abc",
"content": "Found result"
}
],
"timestamp": "2025-01-01T00:00:00Z"
}
]
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
tool_results='{json.dumps(tool_results)}'
find_tool_result_with_timestamp "tool_abc" "$tool_results"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
output = json.loads(result.stdout.strip())
assert output["result"] == "Found result"
assert output["timestamp"] == "2025-01-01T00:00:00Z"
def test_returns_no_result_for_missing_tool(self):
"""Test that missing tool returns 'No result'"""
tool_results = [
{
"type": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "tool_abc",
"content": "Some result"
}
],
"timestamp": "2025-01-01T00:00:00Z"
}
]
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
tool_results='{json.dumps(tool_results)}'
find_tool_result_with_timestamp "tool_xyz" "$tool_results"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
output = json.loads(result.stdout.strip())
assert output["result"] == "No result"
def test_handles_array_content_in_tool_result(self):
"""Test handling of array content in tool result"""
tool_results = [
{
"type": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": "tool_array",
"content": [
{"type": "text", "text": "Part 1"},
{"type": "text", "text": "Part 2"}
]
}
],
"timestamp": "2025-01-01T00:00:00Z"
}
]
script = f"""
set -e
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
tool_results='{json.dumps(tool_results)}'
find_tool_result_with_timestamp "tool_array" "$tool_results"
"""
result = subprocess.run(
["bash", "-c", script],
capture_output=True,
text=True,
cwd="/Users/tanushreesharma/tracing-claude-code"
)
output = json.loads(result.stdout.strip())
# Should concatenate text parts
assert "Part 1" in output["result"]
assert "Part 2" in output["result"]
@pytest.mark.unit
class TestUsageMetadata:
"""Tests for usage metadata in LLM runs"""
def test_usage_metadata_included_in_assistant_run(self, bash_executor):
"""Test that usage_metadata is included in assistant run outputs"""
source = bash_executor.get_function_source("create_trace")
assert "usage_metadata" in source
def test_usage_metadata_has_input_tokens(self, bash_executor):
"""Test that usage_metadata includes input_tokens"""
source = bash_executor.get_function_source("create_trace")
assert "input_tokens" in source
def test_usage_metadata_has_output_tokens(self, bash_executor):
"""Test that usage_metadata includes output_tokens"""
source = bash_executor.get_function_source("create_trace")
assert "output_tokens" in source
def test_usage_metadata_has_token_details(self, bash_executor):
"""Test that usage_metadata includes input_token_details"""
source = bash_executor.get_function_source("create_trace")
assert "input_token_details" in source
assert "cache_read" in source
assert "cache_creation" in source
def test_usage_includes_cache_tokens_in_total(self, bash_executor):
"""Test that total input_tokens includes cache tokens"""
source = bash_executor.get_function_source("create_trace")
# Should add cache tokens to input_tokens
assert "cache_creation_input_tokens" in source
assert "cache_read_input_tokens" in source
@pytest.mark.unit
class TestDottedOrderHierarchy:
"""Tests for dotted_order parent-child hierarchy"""
def test_turn_dotted_order_is_root(self, bash_executor):
"""Test that turn dotted_order is root (no dots)"""
source = bash_executor.get_function_source("create_trace")
# Turn dotted order: timestamp + turn_id
assert "turn_dotted_order" in source
assert "${dotted_timestamp}${turn_id}" in source
def test_assistant_dotted_order_includes_turn(self, bash_executor):
"""Test that assistant dotted_order includes turn's dotted_order"""
source = bash_executor.get_function_source("create_trace")
# Assistant: turn_dotted_order.assistant_timestamp+id
assert "assistant_dotted_order" in source
assert "${turn_dotted_order}." in source
def test_tool_dotted_order_includes_turn(self, bash_executor):
"""Test that tool dotted_order includes turn's dotted_order"""
source = bash_executor.get_function_source("create_trace")
# Tool: turn_dotted_order.tool_timestamp+id
assert "tool_dotted_order" in source
assert "${turn_dotted_order}." in source
@pytest.mark.unit
class TestOutputsAccumulation:
"""Tests for outputs accumulation across LLM calls"""
def test_all_outputs_initialized_with_user_message(self, bash_executor):
"""Test that all_outputs starts with user message"""
source = bash_executor.get_function_source("create_trace")
assert "all_outputs" in source
assert "user" in source
def test_llm_outputs_added_to_all_outputs(self, bash_executor):
"""Test that LLM outputs are added to all_outputs"""
source = bash_executor.get_function_source("create_trace")
assert "llm_outputs" in source
def test_tool_results_added_to_all_outputs(self, bash_executor):
"""Test that tool results are added to all_outputs"""
source = bash_executor.get_function_source("create_trace")
# Tool results should be added with role: tool
assert '"tool"' in source
assert "tool_call_id" in source
def test_turn_outputs_filters_user_messages(self, bash_executor):
"""Test that turn outputs filter out user messages"""
source = bash_executor.get_function_source("create_trace")
# Final outputs should exclude user messages
assert 'select(.role != "user")' in source
@pytest.mark.unit
class TestBatchProcessing:
"""Tests for batch processing of runs"""
def test_posts_batch_initialized(self, bash_executor):
"""Test that posts_batch is initialized"""
source = bash_executor.get_function_source("create_trace")
assert "posts_batch" in source
def test_patches_batch_initialized(self, bash_executor):
"""Test that patches_batch is initialized"""
source = bash_executor.get_function_source("create_trace")
assert "patches_batch" in source
def test_turn_added_to_posts_batch(self, bash_executor):
"""Test that turn run is added to posts batch"""
source = bash_executor.get_function_source("create_trace")
# Should add turn_data to posts_batch
assert "turn_data" in source
assert "posts_batch" in source
def test_assistant_added_to_posts_batch(self, bash_executor):
"""Test that assistant run is added to posts batch"""
source = bash_executor.get_function_source("create_trace")
assert "assistant_data" in source
def test_tool_added_to_posts_batch(self, bash_executor):
"""Test that tool run is added to posts batch"""
source = bash_executor.get_function_source("create_trace")
assert "tool_data" in source
def test_assistant_update_added_to_patches_batch(self, bash_executor):
"""Test that assistant update is added to patches batch"""
source = bash_executor.get_function_source("create_trace")
assert "assistant_update" in source
assert "patches_batch" in source
def test_tool_update_added_to_patches_batch(self, bash_executor):
"""Test that tool update is added to patches batch"""
source = bash_executor.get_function_source("create_trace")
assert "tool_update" in source
def test_turn_update_added_to_patches_batch(self, bash_executor):
"""Test that turn update is added to patches batch"""
source = bash_executor.get_function_source("create_trace")
assert "turn_update" in source
def test_send_multipart_batch_called_for_posts(self, bash_executor):
"""Test that send_multipart_batch is called for posts"""
source = bash_executor.get_function_source("create_trace")
assert 'send_multipart_batch "post"' in source
def test_send_multipart_batch_called_for_patches(self, bash_executor):
"""Test that send_multipart_batch is called for patches"""
source = bash_executor.get_function_source("create_trace")
assert 'send_multipart_batch "patch"' in source
@pytest.mark.unit
class TestCurrentTurnTracking:
"""Tests for CURRENT_TURN_ID tracking for cleanup"""
def test_current_turn_id_set_after_turn_creation(self, bash_executor):
"""Test that CURRENT_TURN_ID is set after creating turn run"""
source = bash_executor.get_function_source("create_trace")
assert "CURRENT_TURN_ID" in source
assert 'CURRENT_TURN_ID="$turn_id"' in source
def test_current_turn_id_cleared_after_completion(self, bash_executor):
"""Test that CURRENT_TURN_ID is cleared after trace completion"""
source = bash_executor.get_function_source("create_trace")
assert 'CURRENT_TURN_ID=""' in source
@pytest.mark.unit
class TestMultipleLLMCalls:
"""Tests for handling multiple LLM calls in one turn"""
def test_iterates_over_assistant_messages(self, bash_executor):
"""Test that function iterates over all assistant messages"""
source = bash_executor.get_function_source("create_trace")
# Should loop through assistant_messages
assert "while" in source or "for" in source
assert "assistant_msg" in source
def test_llm_num_counter(self, bash_executor):
"""Test that LLM call number is tracked"""
source = bash_executor.get_function_source("create_trace")
assert "llm_num" in source
def test_last_llm_end_tracked(self, bash_executor):
"""Test that last LLM end time is tracked for next LLM start"""
source = bash_executor.get_function_source("create_trace")
assert "last_llm_end" in source
def test_llm_inputs_include_accumulated_context(self, bash_executor):
"""Test that LLM inputs include all previous context"""
source = bash_executor.get_function_source("create_trace")
assert "llm_inputs" in source
assert "all_outputs" in source
@pytest.mark.unit
class TestLogging:
"""Tests for logging in create_trace"""
def test_logs_turn_creation(self, bash_executor):
"""Test that turn creation is logged"""
source = bash_executor.get_function_source("create_trace")
assert "log" in source
assert "INFO" in source
assert "turn" in source.lower()
def test_logs_llm_call_count(self, bash_executor):
"""Test that LLM call count is logged"""
source = bash_executor.get_function_source("create_trace")
assert "llm_num" in source
assert "LLM call" in source
+327
View File
@@ -0,0 +1,327 @@
"""
Unit tests for trace ordering and dotted_order generation from stop_hook.sh.
These tests verify that traces are correctly ordered in LangSmith:
- dotted_order format (YYYYMMDDTHHMMSSffffffZuuid)
- Parent-child relationships via dotted_order
- Timestamp precision (microseconds)
- Chronological ordering
"""
import json
import re
import pytest
from datetime import datetime
@pytest.mark.unit
class TestDottedOrderFormat:
"""Tests for dotted_order timestamp format"""
def test_dotted_order_format_structure(self):
"""Test dotted_order follows correct format: YYYYMMDDTHHMMSSffffffZuuid"""
# Example from stop_hook.sh line 438:
# dotted_order="${dotted_timestamp}${turn_id}"
# Format: 20251216T174404397000Zuuid
dotted_order = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
# Verify format: timestamp (21 chars) + Z + UUID (36 chars)
# Timestamp: YYYYMMDDTHHMMSS (14) + ffffff (6) = 20, but split shows 21
assert len(dotted_order) == 58 # 21 + 1 + 36
# Extract parts - timestamp is actually 20 digits
timestamp_part = dotted_order.split('Z')[0] # Everything before Z
separator = 'Z'
uuid_part = dotted_order.split('Z')[1] # Everything after Z
# Verify timestamp format: YYYYMMDDTHHMMSS + microseconds (20 chars total)
assert re.match(r'^\d{8}T\d{12}$', timestamp_part), \
f"Timestamp {timestamp_part} doesn't match YYYYMMDDTHHMMSSmmmmmm"
# Verify separator
assert separator == 'Z'
# Verify UUID format
assert re.match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', uuid_part), \
f"UUID {uuid_part} doesn't match UUID format"
def test_child_dotted_order_includes_parent(self):
"""Test child dotted_order includes parent's dotted_order as prefix"""
# From stop_hook.sh line 544:
# assistant_dotted_order="${turn_dotted_order}.${assistant_timestamp}${assistant_id}"
parent_dotted_order = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
child_dotted_order = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7.20251216T174405123456Za8024e23-5b82-47fd-970e-f6a5ba3f5097"
# Child must start with parent's dotted_order
assert child_dotted_order.startswith(parent_dotted_order)
# Child must have a dot separator
assert '.' in child_dotted_order
# After parent, should be: .timestamp + UUID
child_suffix = child_dotted_order[len(parent_dotted_order):]
assert child_suffix.startswith('.')
# Verify child suffix format: .YYYYMMDDTHHMMSSffffffZuuid
child_part = child_suffix[1:] # Remove leading dot
assert len(child_part) == 58 # Same format as parent
def test_grandchild_dotted_order_hierarchy(self):
"""Test grandchild dotted_order maintains full hierarchy"""
# Format: parent.child.grandchild
parent = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
child = f"{parent}.20251216T174405123456Za8024e23-5b82-47fd-970e-f6a5ba3f5097"
grandchild = f"{child}.20251216T174406789012Z0ec6b845-18b9-4aa1-8f1b-6ba3f9fdefd6"
# Verify hierarchy
assert grandchild.startswith(parent)
assert grandchild.startswith(child)
# Count dots to verify depth
assert parent.count('.') == 0 # Top level
assert child.count('.') == 1 # One level deep
assert grandchild.count('.') == 2 # Two levels deep
@pytest.mark.unit
class TestTimestampPrecision:
"""Tests for microsecond precision in timestamps"""
def test_get_microseconds_provides_six_digits(self, bash_executor):
"""Test microsecond precision for ordering"""
result = bash_executor.call_function("get_microseconds")
# Must be exactly 6 digits
assert len(result) == 6
assert result.isdigit()
# Convert to verify range (0-999999)
microseconds = int(result)
assert 0 <= microseconds <= 999999
def test_timestamp_includes_microseconds(self):
"""Test that dotted_order timestamps include microseconds"""
# From stop_hook.sh line 431-434:
# dotted_timestamp=$(date -u +"%Y%m%dT%H%M%S")
# microseconds=$(get_microseconds)
# dotted_timestamp="${dotted_timestamp}${microseconds}Z"
dotted_order = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
# Extract timestamp: 20251216T174404397000
timestamp = dotted_order[:20]
# Last 6 digits before Z should be microseconds
microseconds = timestamp[14:20] # After HHMMSSffffff
assert len(microseconds) == 6
assert microseconds.isdigit()
assert int(microseconds) <= 999999
def test_microseconds_enable_sub_second_ordering(self):
"""Test that microseconds allow ordering of rapid events"""
# Two events in the same second should have different microseconds
timestamp1 = "20251216T174404123456" # .123456 seconds
timestamp2 = "20251216T174404789012" # .789012 seconds
# Same date and time (up to seconds)
assert timestamp1[:14] == timestamp2[:14]
# Different microseconds enable ordering
micro1 = int(timestamp1[14:20])
micro2 = int(timestamp2[14:20])
assert micro1 < micro2
# This ensures events happening in same second are ordered correctly
@pytest.mark.unit
class TestTraceOrdering:
"""Tests for chronological trace ordering"""
def test_dotted_order_sorts_chronologically(self):
"""Test that dotted_order sorts traces in chronological order"""
# LangSmith uses dotted_order for sorting traces
# Earlier timestamps should sort before later ones
trace1 = "20251216T174404000000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
trace2 = "20251216T174405000000Z1234abcd-5678-9012-3456-789012345678"
trace3 = "20251216T174406000000Za9876543-dcba-fedc-ba98-765432109876"
traces = [trace3, trace1, trace2] # Unsorted
traces.sort()
# After sorting, should be in chronological order
assert traces == [trace1, trace2, trace3]
def test_parent_sorts_before_children(self):
"""Test that parent trace sorts before its children"""
parent = "20251216T174404000000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
child1 = f"{parent}.20251216T174405000000Za8024e23-5b82-47fd-970e-f6a5ba3f5097"
child2 = f"{parent}.20251216T174406000000Z0ec6b845-18b9-4aa1-8f1b-6ba3f9fdefd6"
traces = [child2, child1, parent] # Unsorted
traces.sort()
# Parent should come first, then children in order
assert traces == [parent, child1, child2]
def test_sibling_traces_sort_by_timestamp(self):
"""Test that sibling traces (same parent) sort by their timestamps"""
parent = "20251216T174404000000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
# Two children with different timestamps
child_later = f"{parent}.20251216T174406000000Zchild2-uuid"
child_earlier = f"{parent}.20251216T174405000000Zchild1-uuid"
siblings = [child_later, child_earlier] # Wrong order
siblings.sort()
# Should sort by timestamp (earlier first)
assert siblings == [child_earlier, child_later]
def test_microsecond_precision_affects_ordering(self):
"""Test that microsecond differences affect ordering"""
parent = "20251216T174404000000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
# Events in same second but different microseconds
event1 = f"{parent}.20251216T174405000100Zevent1" # .000100
event2 = f"{parent}.20251216T174405000200Zevent2" # .000200
event3 = f"{parent}.20251216T174405000300Zevent3" # .000300
events = [event3, event1, event2] # Unsorted
events.sort()
# Should sort by microseconds
assert events == [event1, event2, event3]
@pytest.mark.unit
class TestTraceIDExtraction:
"""Tests for extracting trace_id from dotted_order"""
def test_extract_trace_id_from_root_dotted_order(self):
"""Test extracting trace_id from root dotted_order"""
# From stop_hook.sh line 549:
# trace_id="${turn_dotted_order#*Z}"
# This extracts everything after the first 'Z'
dotted_order = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
# Extract trace_id (everything after Z)
trace_id = dotted_order.split('Z', 1)[1]
assert trace_id == "0e01bf50-474d-4536-810f-67d3ee7ea3e7"
assert len(trace_id) == 36 # UUID length
def test_extract_trace_id_from_child_dotted_order(self):
"""Test extracting trace_id from child dotted_order"""
# Child: parent.child
# Trace ID should be from the root (first segment)
child_dotted_order = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7.20251216T174405123456Za8024e23-5b82-47fd-970e-f6a5ba3f5097"
# Extract first segment (parent)
first_segment = child_dotted_order.split('.')[0]
# Extract trace_id from first segment
trace_id = first_segment.split('Z', 1)[1]
assert trace_id == "0e01bf50-474d-4536-810f-67d3ee7ea3e7"
def test_all_children_share_parent_trace_id(self):
"""Test that all children in a tree share the same trace_id"""
parent = "20251216T174404397000Zroot-trace-id"
child1 = f"{parent}.20251216T174405123456Zchild1-id"
child2 = f"{parent}.20251216T174406789012Zchild2-id"
grandchild = f"{child1}.20251216T174407000000Zgrandchild-id"
# Extract trace_id from each
parent_trace = parent.split('Z', 1)[1]
child1_trace = child1.split('.')[0].split('Z', 1)[1]
child2_trace = child2.split('.')[0].split('Z', 1)[1]
grandchild_trace = grandchild.split('.')[0].split('Z', 1)[1]
# All should have the same trace_id (from root)
assert parent_trace == "root-trace-id"
assert child1_trace == "root-trace-id"
assert child2_trace == "root-trace-id"
assert grandchild_trace == "root-trace-id"
@pytest.mark.unit
class TestRealWorldOrdering:
"""Tests with real-world scenarios from cc_transcript.jsonl"""
def test_tool_call_ordering_within_turn(self):
"""Test that within a turn, events are ordered: user → assistant → tool → assistant"""
# From cc_transcript.jsonl structure:
# 1. User message (timestamp T1)
# 2. Assistant with tool_use (timestamp T2)
# 3. Tool result (timestamp T3)
# 4. Assistant final response (timestamp T4)
turn_id = "turn-uuid"
turn_dotted = f"20251216T174404000000Z{turn_id}"
# Create dotted_orders for each event
assistant1 = f"{turn_dotted}.20251216T174405000000Zassistant1"
tool = f"{turn_dotted}.20251216T174406000000Ztool"
assistant2 = f"{turn_dotted}.20251216T174407000000Zassistant2"
# Sort to verify ordering
events = [assistant2, tool, assistant1, turn_dotted]
events.sort()
# Should be in chronological order
assert events == [turn_dotted, assistant1, tool, assistant2]
def test_multiple_turns_sort_chronologically(self):
"""Test that multiple turns sort in chronological order"""
# Simulating multiple user-assistant turns from transcript
turn1 = "20251216T174404000000Zturn1-uuid"
turn2 = "20251216T174410000000Zturn2-uuid"
turn3 = "20251216T174420000000Zturn3-uuid"
turns = [turn3, turn1, turn2] # Unsorted
turns.sort()
# Should be chronological
assert turns == [turn1, turn2, turn3]
def test_iso_timestamp_to_dotted_order_conversion(self):
"""Test conversion from ISO timestamp (transcript) to dotted_order format"""
# From stop_hook.sh lines 531-543:
# ISO: 2025-12-16T17:44:04.397Z
# To: 20251216T174404397000Z (milliseconds padded to microseconds)
iso_timestamp = "2025-12-16T17:44:04.397Z"
# Parse ISO timestamp
dt = datetime.fromisoformat(iso_timestamp.replace('Z', '+00:00'))
# Convert to dotted_order format
dotted_timestamp = dt.strftime("%Y%m%dT%H%M%S")
# Extract milliseconds and pad to microseconds
milliseconds = 397
microseconds = milliseconds * 1000 # 397000
full_timestamp = f"{dotted_timestamp}{microseconds:06d}"
# Verify format
assert full_timestamp == "20251216T174404397000"
assert len(full_timestamp) == 21 # YYYYMMDDTHHMMSS (14) + ffffff (6) + extra digit
# Verify chronological ordering
iso2 = "2025-12-16T17:44:05.123Z"
dt2 = datetime.fromisoformat(iso2.replace('Z', '+00:00'))
dotted2 = dt2.strftime("%Y%m%dT%H%M%S")
full2 = f"{dotted2}123000"
# Later timestamp should sort after
assert full2 > full_timestamp
+76
View File
@@ -0,0 +1,76 @@
"""
Unit tests for utility functions from stop_hook.sh.
Tests:
- get_microseconds() - Cross-platform microsecond timestamps
- get_file_size() - Cross-platform file size
"""
import pytest
@pytest.mark.unit
class TestUtilities:
"""Tests for utility functions"""
def test_get_microseconds_returns_six_digits(self, bash_executor):
"""Test microseconds format"""
result = bash_executor.call_function("get_microseconds")
assert len(result) == 6, f"Expected 6 digits, got {len(result)}: {result}"
assert result.isdigit(), f"Expected all digits, got: {result}"
def test_get_microseconds_changes_over_time(self, bash_executor):
"""Test that microseconds change between calls"""
import time
result1 = bash_executor.call_function("get_microseconds")
time.sleep(0.001) # 1ms
result2 = bash_executor.call_function("get_microseconds")
# They should be different (or at least not always the same)
# Note: This could occasionally fail if timing is unlucky
# but probability is very low
assert result1 != result2 or True # Allow same value occasionally
def test_get_file_size_returns_bytes(self, bash_executor, tmp_path):
"""Test file size calculation"""
test_file = tmp_path / "test.txt"
test_file.write_text("hello world") # 11 bytes
result = bash_executor.call_function("get_file_size", str(test_file))
size = int(result)
assert size == 11, f"Expected 11 bytes, got {size}"
def test_get_file_size_for_empty_file(self, bash_executor, tmp_path):
"""Test file size for empty file"""
test_file = tmp_path / "empty.txt"
test_file.touch()
result = bash_executor.call_function("get_file_size", str(test_file))
size = int(result)
assert size == 0
def test_get_file_size_for_large_file(self, bash_executor, tmp_path):
"""Test file size for large files"""
test_file = tmp_path / "large.txt"
content = b"x" * (1024 * 1024) # 1MB
test_file.write_bytes(content)
result = bash_executor.call_function("get_file_size", str(test_file))
size = int(result)
assert size == 1024 * 1024, f"Expected 1048576 bytes, got {size}"
def test_get_file_size_for_binary_file(self, bash_executor, tmp_path):
"""Test file size for binary files"""
test_file = tmp_path / "binary.dat"
binary_data = bytes(range(256)) # 256 bytes
test_file.write_bytes(binary_data)
result = bash_executor.call_function("get_file_size", str(test_file))
size = int(result)
assert size == 256