mirror of
https://github.com/langchain-ai/tracing-claude-code.git
synced 2026-07-01 20:04:49 -04:00
add tests
This commit is contained in:
@@ -0,0 +1,28 @@
|
||||
# Claude Code to LangSmith Tracing Project
|
||||
|
||||
## Project Overview
|
||||
This project sets up tracing of Claude Code conversations to LangSmith.
|
||||
|
||||
## How It Works
|
||||
- A "Stop" hook is configured in `.claude/settings.local.json` that runs each time Claude Code responds
|
||||
- The hook reads Claude Code's generated conversation transcripts
|
||||
- Messages in the transcript are converted into LangSmith runs and sent to the configured LangSmith project
|
||||
|
||||
## Commands
|
||||
|
||||
### Fetch Traces
|
||||
Use the langsmith-fetch command to retrieve traces from the LangSmith project when you want to debug. Do this proactivley to make sure your changes are correct:
|
||||
|
||||
Get the last trace:
|
||||
```bash
|
||||
langsmith-fetch traces --project-uuid 16e20536-e4d7-4390-8fcf-1d49cb47f4c2 --format json
|
||||
```
|
||||
|
||||
Get the last N traces:
|
||||
```bash
|
||||
langsmith-fetch traces --project-uuid 16e20536-e4d7-4390-8fcf-1d49cb47f4c2 --format json --limit 5
|
||||
```
|
||||
|
||||
## Project Configuration
|
||||
- LangSmith Project UUID: `16e20536-e4d7-4390-8fcf-1d49cb47f4c2`
|
||||
- Hook configuration is in `.claude/settings.local.json`
|
||||
+52
@@ -0,0 +1,52 @@
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
*.so
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# Virtual environments
|
||||
.venv/
|
||||
venv/
|
||||
ENV/
|
||||
|
||||
# Testing
|
||||
.pytest_cache/
|
||||
.coverage
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
|
||||
# IDE
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# Environment variables
|
||||
.env
|
||||
.env.local
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Project specific
|
||||
cc_transcript.jsonl
|
||||
*.log
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
[pytest]
|
||||
testpaths = tests
|
||||
python_files = test_*.py
|
||||
python_classes = Test*
|
||||
python_functions = test_*
|
||||
|
||||
# Markers for test categorization
|
||||
markers =
|
||||
unit: Unit tests for individual functions
|
||||
integration: Integration tests requiring API calls
|
||||
slow: Tests that take >5 seconds
|
||||
|
||||
# Integration tests disabled by default (require API key)
|
||||
addopts =
|
||||
-v
|
||||
--tb=short
|
||||
--strict-markers
|
||||
-m "not integration"
|
||||
--cov=tests
|
||||
--cov-report=html
|
||||
--cov-report=term-missing
|
||||
|
||||
# Timeout for tests
|
||||
timeout = 60
|
||||
|
||||
# Capture output
|
||||
log_cli = false
|
||||
log_cli_level = INFO
|
||||
Executable
+957
@@ -0,0 +1,957 @@
|
||||
#!/bin/bash
|
||||
###
|
||||
# Claude Code Stop Hook - LangSmith Tracing Integration
|
||||
# Sends Claude Code traces to LangSmith after each response.
|
||||
###
|
||||
|
||||
set -e
|
||||
|
||||
# Config (needed early for logging)
|
||||
LOG_FILE="$HOME/.claude/state/hook.log"
|
||||
DEBUG="$(echo "$CC_LANGSMITH_DEBUG" | tr '[:upper:]' '[:lower:]')"
|
||||
|
||||
# Logging functions
|
||||
log() {
|
||||
local level="$1"
|
||||
shift
|
||||
echo "$(date '+%Y-%m-%d %H:%M:%S') [$level] $*" >> "$LOG_FILE"
|
||||
}
|
||||
|
||||
debug() {
|
||||
if [ "$DEBUG" = "true" ]; then
|
||||
log "DEBUG" "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
# Immediate debug logging
|
||||
debug "Hook started, TRACE_TO_LANGSMITH=$TRACE_TO_LANGSMITH"
|
||||
|
||||
# Exit early if tracing disabled
|
||||
if [ "$(echo "$TRACE_TO_LANGSMITH" | tr '[:upper:]' '[:lower:]')" != "true" ]; then
|
||||
debug "Tracing disabled, exiting early"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Required commands
|
||||
for cmd in jq curl uuidgen; do
|
||||
if ! command -v "$cmd" &> /dev/null; then
|
||||
echo "Error: $cmd is required but not installed" >&2
|
||||
exit 0
|
||||
fi
|
||||
done
|
||||
|
||||
# Config (continued)
|
||||
API_KEY="${CC_LANGSMITH_API_KEY:-$LANGSMITH_API_KEY}"
|
||||
PROJECT="${CC_LANGSMITH_PROJECT:-claude-code}"
|
||||
API_BASE="https://api.smith.langchain.com"
|
||||
STATE_FILE="${STATE_FILE:-$HOME/.claude/state/langsmith_state.json}"
|
||||
|
||||
# Global variables
|
||||
CURRENT_TURN_ID="" # Track current turn run for cleanup on exit
|
||||
|
||||
# Ensure state directory exists
|
||||
mkdir -p "$(dirname "$STATE_FILE")"
|
||||
|
||||
# Validate API key
|
||||
if [ -z "$API_KEY" ]; then
|
||||
log "ERROR" "CC_LANGSMITH_API_KEY not set"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Get microseconds portably (macOS doesn't support date +%N)
|
||||
get_microseconds() {
|
||||
if command -v gdate &> /dev/null; then
|
||||
# Use GNU date if available (brew install coreutils)
|
||||
gdate +%6N
|
||||
elif [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
# macOS fallback: use Python for microseconds
|
||||
python3 -c "import time; print(str(int(time.time() * 1000000) % 1000000).zfill(6))"
|
||||
else
|
||||
# Linux/GNU date
|
||||
date +%6N
|
||||
fi
|
||||
}
|
||||
|
||||
# Get file size portably (macOS and Linux have different stat syntax)
|
||||
get_file_size() {
|
||||
local file="$1"
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
stat -f%z "$file"
|
||||
else
|
||||
stat -c%s "$file"
|
||||
fi
|
||||
}
|
||||
|
||||
# API call helper
|
||||
api_call() {
|
||||
local method="$1"
|
||||
local endpoint="$2"
|
||||
local data="$3"
|
||||
|
||||
local response
|
||||
local http_code
|
||||
response=$(curl -s --max-time 60 -w "\n%{http_code}" -X "$method" \
|
||||
-H "x-api-key: $API_KEY" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$data" \
|
||||
"$API_BASE$endpoint" 2>&1)
|
||||
|
||||
http_code=$(echo "$response" | tail -n1)
|
||||
response=$(echo "$response" | sed '$d')
|
||||
|
||||
if [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then
|
||||
log "ERROR" "API call failed: $method $endpoint"
|
||||
log "ERROR" "HTTP $http_code: $response"
|
||||
log "ERROR" "Request data: ${data:0:500}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "$response"
|
||||
}
|
||||
|
||||
# Cleanup function to complete pending turn run on exit
|
||||
cleanup_pending_turn() {
|
||||
if [ -n "$CURRENT_TURN_ID" ]; then
|
||||
debug "Cleanup: completing pending turn run $CURRENT_TURN_ID"
|
||||
local now
|
||||
now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
local turn_update
|
||||
turn_update=$(jq -n \
|
||||
--arg time "$now" \
|
||||
'{
|
||||
outputs: {messages: []},
|
||||
end_time: $time,
|
||||
error: "Incomplete: script exited early"
|
||||
}')
|
||||
|
||||
# Try to complete the turn run (ignore errors since we're exiting anyway)
|
||||
api_call "PATCH" "/runs/$CURRENT_TURN_ID" "$turn_update" > /dev/null 2>&1 || true
|
||||
log "WARN" "Completed pending turn run $CURRENT_TURN_ID due to early exit"
|
||||
fi
|
||||
}
|
||||
|
||||
# Set trap to cleanup on exit (EXIT covers normal exit, errors, and interrupts)
|
||||
trap cleanup_pending_turn EXIT
|
||||
|
||||
# Load state
|
||||
load_state() {
|
||||
if [ ! -f "$STATE_FILE" ]; then
|
||||
echo "{}"
|
||||
return
|
||||
fi
|
||||
cat "$STATE_FILE"
|
||||
}
|
||||
|
||||
# Save state
|
||||
save_state() {
|
||||
local state="$1"
|
||||
echo "$state" > "$STATE_FILE"
|
||||
}
|
||||
|
||||
# Get message content
|
||||
get_content() {
|
||||
local msg="$1"
|
||||
echo "$msg" | jq -c 'if type == "object" and has("message") then .message.content elif type == "object" then .content else null end'
|
||||
}
|
||||
|
||||
# Check if message is tool result
|
||||
is_tool_result() {
|
||||
local msg="$1"
|
||||
local content
|
||||
content=$(get_content "$msg")
|
||||
|
||||
if echo "$content" | jq -e 'if type == "array" then any(.[]; type == "object" and .type == "tool_result") else false end' > /dev/null 2>&1; then
|
||||
echo "true"
|
||||
else
|
||||
echo "false"
|
||||
fi
|
||||
}
|
||||
|
||||
# Format content blocks for LangSmith
|
||||
format_content() {
|
||||
local msg="$1"
|
||||
local content
|
||||
content=$(get_content "$msg")
|
||||
|
||||
# Handle string content
|
||||
if echo "$content" | jq -e 'type == "string"' > /dev/null 2>&1; then
|
||||
echo "$content" | jq '[{"type": "text", "text": .}]'
|
||||
return
|
||||
fi
|
||||
|
||||
# Handle array content
|
||||
if echo "$content" | jq -e 'type == "array"' > /dev/null 2>&1; then
|
||||
echo "$content" | jq '[
|
||||
.[] |
|
||||
if type == "object" then
|
||||
if .type == "text" then
|
||||
{"type": "text", "text": .text}
|
||||
elif .type == "thinking" then
|
||||
{"type": "thinking", "thinking": .thinking}
|
||||
elif .type == "tool_use" then
|
||||
{"type": "tool_call", "name": .name, "args": .input, "id": .id}
|
||||
else
|
||||
.
|
||||
end
|
||||
elif type == "string" then
|
||||
{"type": "text", "text": .}
|
||||
else
|
||||
.
|
||||
end
|
||||
] | if length == 0 then [{"type": "text", "text": ""}] else . end'
|
||||
return
|
||||
fi
|
||||
|
||||
# Default
|
||||
echo '[{"type": "text", "text": ""}]'
|
||||
}
|
||||
|
||||
# Get tool uses from message
|
||||
get_tool_uses() {
|
||||
local msg="$1"
|
||||
local content
|
||||
content=$(get_content "$msg")
|
||||
|
||||
# Check if content is an array
|
||||
if ! echo "$content" | jq -e 'type == "array"' > /dev/null 2>&1; then
|
||||
echo "[]"
|
||||
return
|
||||
fi
|
||||
|
||||
echo "$content" | jq -c '[.[] | select(type == "object" and .type == "tool_use")]'
|
||||
}
|
||||
|
||||
# Get usage from assistant message parts (takes last for SSE cumulative counts)
|
||||
get_usage_from_parts() {
|
||||
local parts="$1"
|
||||
echo "$parts" | jq -c '
|
||||
[.[] | .message.usage // null | select(. != null)] | last // null
|
||||
'
|
||||
}
|
||||
|
||||
# Find tool result and timestamp
|
||||
# Returns JSON: {result: "...", timestamp: "..."}
|
||||
find_tool_result_with_timestamp() {
|
||||
local tool_id="$1"
|
||||
local tool_results="$2"
|
||||
|
||||
local result_data
|
||||
result_data=$(echo "$tool_results" | jq -c --arg id "$tool_id" '
|
||||
first(
|
||||
.[] |
|
||||
. as $msg |
|
||||
(if type == "object" and has("message") then .message.content elif type == "object" then .content else null end) as $content |
|
||||
if $content | type == "array" then
|
||||
$content[] |
|
||||
select(type == "object" and .type == "tool_result" and .tool_use_id == $id) |
|
||||
{
|
||||
result: (
|
||||
if .content | type == "array" then
|
||||
[.content[] | select(type == "object" and .type == "text") | .text] | join(" ")
|
||||
elif .content | type == "string" then
|
||||
.content
|
||||
else
|
||||
.content | tostring
|
||||
end
|
||||
),
|
||||
timestamp: $msg.timestamp
|
||||
}
|
||||
else
|
||||
empty
|
||||
end
|
||||
) // {result: "No result", timestamp: null}
|
||||
')
|
||||
|
||||
echo "$result_data"
|
||||
}
|
||||
|
||||
# Merge assistant message parts
|
||||
merge_assistant_parts() {
|
||||
local current_assistant_parts="$1"
|
||||
|
||||
# Extract usage from parts (last one for SSE cumulative)
|
||||
local usage
|
||||
usage=$(get_usage_from_parts "$current_assistant_parts")
|
||||
|
||||
echo "$current_assistant_parts" | jq -s \
|
||||
--argjson usage "$usage" \
|
||||
'
|
||||
.[0][0] as $base |
|
||||
(.[0] | map(if type == "object" and has("message") then .message.content elif type == "object" then .content else null end) | map(select(. != null))) as $contents |
|
||||
($contents | map(
|
||||
if type == "string" then [{"type":"text","text":.}]
|
||||
elif type == "array" then .
|
||||
else [.]
|
||||
end
|
||||
) | add // []) as $merged_content |
|
||||
($merged_content | reduce .[] as $item (
|
||||
{result: [], buffer: null};
|
||||
if $item.type == "text" then
|
||||
if .buffer then .buffer.text += $item.text
|
||||
else .buffer = $item
|
||||
end
|
||||
else
|
||||
(if .buffer then .result += [.buffer] else . end) |
|
||||
.buffer = null | .result += [$item]
|
||||
end
|
||||
) | if .buffer then .result + [.buffer] else .result end) as $final_content |
|
||||
$base |
|
||||
if type == "object" and has("message") then
|
||||
.message.content = $final_content |
|
||||
(if $usage != null then .message._usage = $usage else . end)
|
||||
elif type == "object" then
|
||||
.content = $final_content |
|
||||
(if $usage != null then ._usage = $usage else . end)
|
||||
else
|
||||
.
|
||||
end
|
||||
'
|
||||
}
|
||||
|
||||
# Serialize run data for multipart upload
|
||||
# Writes parts to temp files and outputs curl -F arguments (one per line)
|
||||
serialize_for_multipart() {
|
||||
local operation="$1" # "post" or "patch"
|
||||
local run_json="$2" # Full run JSON
|
||||
local temp_dir="$3" # Temp directory for this batch
|
||||
|
||||
local run_id
|
||||
run_id=$(echo "$run_json" | jq -r '.id')
|
||||
|
||||
# Extract inputs/outputs from main data
|
||||
local inputs
|
||||
inputs=$(echo "$run_json" | jq -c '.inputs // empty')
|
||||
|
||||
local outputs
|
||||
outputs=$(echo "$run_json" | jq -c '.outputs // empty')
|
||||
|
||||
local main_data
|
||||
main_data=$(echo "$run_json" | jq -c 'del(.inputs, .outputs)')
|
||||
|
||||
# Part 1: Main run data with Content-Length header
|
||||
local main_file="$temp_dir/${operation}_${run_id}_main.json"
|
||||
echo "$main_data" > "$main_file"
|
||||
local main_size=$(get_file_size "$main_file")
|
||||
echo "-F"
|
||||
echo "${operation}.${run_id}=<${main_file};type=application/json;headers=Content-Length:${main_size}"
|
||||
|
||||
# Part 2: Inputs (if present) with Content-Length header
|
||||
if [ "$inputs" != "null" ] && [ -n "$inputs" ]; then
|
||||
local inputs_file="$temp_dir/${operation}_${run_id}_inputs.json"
|
||||
echo "$inputs" > "$inputs_file"
|
||||
local inputs_size=$(get_file_size "$inputs_file")
|
||||
echo "-F"
|
||||
echo "${operation}.${run_id}.inputs=<${inputs_file};type=application/json;headers=Content-Length:${inputs_size}"
|
||||
fi
|
||||
|
||||
# Part 3: Outputs (if present) with Content-Length header
|
||||
if [ "$outputs" != "null" ] && [ -n "$outputs" ]; then
|
||||
local outputs_file="$temp_dir/${operation}_${run_id}_outputs.json"
|
||||
echo "$outputs" > "$outputs_file"
|
||||
local outputs_size=$(get_file_size "$outputs_file")
|
||||
echo "-F"
|
||||
echo "${operation}.${run_id}.outputs=<${outputs_file};type=application/json;headers=Content-Length:${outputs_size}"
|
||||
fi
|
||||
}
|
||||
|
||||
# Send batch of runs via multipart endpoint
|
||||
send_multipart_batch() {
|
||||
local operation="$1" # "post" or "patch"
|
||||
local batch_json="$2" # JSON array of runs
|
||||
|
||||
# Parse batch size
|
||||
local batch_size
|
||||
batch_size=$(echo "$batch_json" | jq 'length')
|
||||
|
||||
if [ "$batch_size" -eq 0 ]; then
|
||||
debug "No $operation runs to send"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# Create temp directory for this batch
|
||||
local temp_dir
|
||||
temp_dir=$(mktemp -d)
|
||||
|
||||
# Build multipart curl command
|
||||
local curl_args=()
|
||||
curl_args+=("-s" "--max-time" "60" "-w" "\n%{http_code}" "-X" "POST")
|
||||
curl_args+=("-H" "x-api-key: $API_KEY")
|
||||
|
||||
# Serialize each run and collect curl -F arguments
|
||||
while IFS= read -r run; do
|
||||
# Read arguments line by line (proper array handling, no word splitting)
|
||||
while IFS= read -r arg; do
|
||||
curl_args+=("$arg")
|
||||
done < <(serialize_for_multipart "$operation" "$run" "$temp_dir")
|
||||
done < <(echo "$batch_json" | jq -c '.[]')
|
||||
|
||||
curl_args+=("$API_BASE/runs/multipart")
|
||||
|
||||
# Execute curl
|
||||
local response
|
||||
local http_code
|
||||
|
||||
response=$(curl "${curl_args[@]}" 2>&1)
|
||||
http_code=$(echo "$response" | tail -n1)
|
||||
response=$(echo "$response" | sed '$d')
|
||||
|
||||
# Cleanup temp directory
|
||||
rm -rf "$temp_dir"
|
||||
|
||||
if [ "$http_code" -lt 200 ] || [ "$http_code" -ge 300 ]; then
|
||||
log "ERROR" "Batch $operation failed: HTTP $http_code"
|
||||
log "ERROR" "Response: $response"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log "INFO" "Batch $operation succeeded: $batch_size runs"
|
||||
return 0
|
||||
}
|
||||
|
||||
# Create LangSmith trace
|
||||
create_trace() {
|
||||
local session_id="$1"
|
||||
local turn_num="$2"
|
||||
local user_msg="$3"
|
||||
local assistant_messages="$4" # JSON array of assistant messages
|
||||
local tool_results="$5"
|
||||
|
||||
# Initialize batch collectors for this trace
|
||||
local posts_batch="[]"
|
||||
local patches_batch="[]"
|
||||
|
||||
local turn_id
|
||||
turn_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
local user_content
|
||||
user_content=$(format_content "$user_msg")
|
||||
|
||||
local now
|
||||
now=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
# Create dotted_order timestamp with microseconds (format: YYYYMMDDTHHMMSSffffffZ)
|
||||
local dotted_timestamp
|
||||
dotted_timestamp=$(date -u +"%Y%m%dT%H%M%S")
|
||||
local microseconds
|
||||
microseconds=$(get_microseconds)
|
||||
dotted_timestamp="${dotted_timestamp}${microseconds}Z"
|
||||
|
||||
# Create top-level turn run with dotted_order and trace_id
|
||||
# For top-level run: trace_id = run_id
|
||||
local turn_dotted_order="${dotted_timestamp}${turn_id}"
|
||||
local turn_data
|
||||
turn_data=$(jq -n \
|
||||
--arg id "$turn_id" \
|
||||
--arg trace_id "$turn_id" \
|
||||
--arg name "Claude Code" \
|
||||
--arg project "$PROJECT" \
|
||||
--arg session "$session_id" \
|
||||
--arg time "$now" \
|
||||
--argjson content "$user_content" \
|
||||
--arg turn "$turn_num" \
|
||||
--arg dotted_order "$turn_dotted_order" \
|
||||
'{
|
||||
id: $id,
|
||||
trace_id: $trace_id,
|
||||
name: $name,
|
||||
run_type: "chain",
|
||||
inputs: {messages: [{role: "user", content: $content}]},
|
||||
start_time: $time,
|
||||
dotted_order: $dotted_order,
|
||||
session_name: $project,
|
||||
extra: {metadata: {thread_id: $session}},
|
||||
tags: ["claude-code", ("turn-" + $turn)]
|
||||
}')
|
||||
|
||||
posts_batch=$(echo "$posts_batch" | jq --argjson data "$turn_data" '. += [$data]')
|
||||
|
||||
# Track this turn for cleanup on early exit
|
||||
CURRENT_TURN_ID="$turn_id"
|
||||
|
||||
# Build final outputs array (accumulates all LLM responses)
|
||||
local all_outputs
|
||||
all_outputs=$(jq -n --argjson content "$user_content" '[{role: "user", content: $content}]')
|
||||
|
||||
# Process each assistant message (each represents one LLM call)
|
||||
local llm_num=0
|
||||
local last_llm_end="$now"
|
||||
while IFS= read -r assistant_msg; do
|
||||
llm_num=$((llm_num + 1))
|
||||
|
||||
# Extract timestamp from message for proper ordering
|
||||
local msg_timestamp
|
||||
msg_timestamp=$(echo "$assistant_msg" | jq -r '.timestamp // ""')
|
||||
|
||||
# Use message timestamp for LLM start time
|
||||
local llm_start
|
||||
if [ -n "$msg_timestamp" ]; then
|
||||
llm_start="$msg_timestamp"
|
||||
elif [ $llm_num -eq 1 ]; then
|
||||
llm_start="$now"
|
||||
else
|
||||
llm_start="$last_llm_end"
|
||||
fi
|
||||
|
||||
# Create assistant run
|
||||
local assistant_id
|
||||
assistant_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
local tool_uses
|
||||
tool_uses=$(get_tool_uses "$assistant_msg")
|
||||
|
||||
local assistant_content
|
||||
assistant_content=$(format_content "$assistant_msg")
|
||||
|
||||
# Extract model name from assistant message and strip date suffix
|
||||
# e.g., "claude-sonnet-4-5-20250929" -> "claude-sonnet-4-5"
|
||||
local model_name
|
||||
model_name=$(echo "$assistant_msg" | jq -r 'if type == "object" and has("message") then .message.model else empty end' | sed 's/-[0-9]\{8\}$//')
|
||||
|
||||
# Extract usage data from assistant message (preserved by merge_assistant_parts)
|
||||
local msg_usage
|
||||
msg_usage=$(echo "$assistant_msg" | jq 'if type == "object" and has("message") then .message._usage // null elif type == "object" then ._usage // null else null end')
|
||||
|
||||
# Build usage_metadata for LangSmith
|
||||
local usage_metadata
|
||||
if [ "$msg_usage" != "null" ] && [ -n "$msg_usage" ]; then
|
||||
usage_metadata=$(echo "$msg_usage" | jq '{
|
||||
input_tokens: ((.input_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0)),
|
||||
output_tokens: (.output_tokens // 0),
|
||||
input_token_details: {
|
||||
cache_read: (.cache_read_input_tokens // 0),
|
||||
cache_creation: (.cache_creation_input_tokens // 0)
|
||||
}
|
||||
}')
|
||||
else
|
||||
usage_metadata="null"
|
||||
fi
|
||||
|
||||
# Build inputs for this LLM call (includes accumulated context)
|
||||
local llm_inputs
|
||||
llm_inputs=$(jq -n --argjson outputs "$all_outputs" '{messages: $outputs}')
|
||||
|
||||
# Create dotted_order for assistant (child of turn)
|
||||
# Convert ISO timestamp to dotted_order format
|
||||
# From: 2025-12-16T17:44:04.397Z
|
||||
# To: 20251216T174404397000Z (milliseconds padded to microseconds)
|
||||
local assistant_timestamp
|
||||
if [ -n "$msg_timestamp" ]; then
|
||||
# Extract and convert timestamp from message
|
||||
assistant_timestamp=$(echo "$msg_timestamp" | sed 's/[-:]//g; s/\.\([0-9]*\)Z$/\1000Z/; s/T\([0-9]*\)\([0-9]\{3\}\)000Z$/T\1\2000Z/')
|
||||
else
|
||||
# Fallback to current time if no timestamp
|
||||
assistant_timestamp=$(date -u +"%Y%m%dT%H%M%S")
|
||||
local assistant_microseconds
|
||||
assistant_microseconds=$(get_microseconds)
|
||||
assistant_timestamp="${assistant_timestamp}${assistant_microseconds}Z"
|
||||
fi
|
||||
local assistant_dotted_order="${turn_dotted_order}.${assistant_timestamp}${assistant_id}"
|
||||
|
||||
# Extract trace_id from parent dotted_order (UUID after the Z)
|
||||
# Format: 20231215T120000123456Zuuid -> uuid
|
||||
local trace_id
|
||||
trace_id="${turn_dotted_order#*Z}"
|
||||
|
||||
local assistant_data
|
||||
assistant_data=$(jq -n \
|
||||
--arg id "$assistant_id" \
|
||||
--arg trace_id "$trace_id" \
|
||||
--arg parent "$turn_id" \
|
||||
--arg name "Claude" \
|
||||
--arg project "$PROJECT" \
|
||||
--arg time "$llm_start" \
|
||||
--argjson inputs "$llm_inputs" \
|
||||
--arg dotted_order "$assistant_dotted_order" \
|
||||
--arg model "$model_name" \
|
||||
'{
|
||||
id: $id,
|
||||
trace_id: $trace_id,
|
||||
parent_run_id: $parent,
|
||||
name: $name,
|
||||
run_type: "llm",
|
||||
inputs: $inputs,
|
||||
start_time: $time,
|
||||
dotted_order: $dotted_order,
|
||||
session_name: $project,
|
||||
extra: {metadata: {ls_provider: "anthropic", ls_model_name: $model}},
|
||||
tags: [$model]
|
||||
}')
|
||||
|
||||
posts_batch=$(echo "$posts_batch" | jq --argjson data "$assistant_data" '. += [$data]')
|
||||
|
||||
# Build outputs for this LLM call
|
||||
local llm_outputs
|
||||
llm_outputs=$(jq -n --argjson content "$assistant_content" '[{role: "assistant", content: $content}]')
|
||||
|
||||
# Track when this LLM iteration ends (after tools complete)
|
||||
local assistant_end
|
||||
|
||||
# Create tool runs as siblings of the assistant run
|
||||
if [ "$(echo "$tool_uses" | jq 'length')" -gt 0 ]; then
|
||||
# First tool starts after LLM completes
|
||||
# Use llm_start as LLM end time approximation (we don't have separate end timestamp)
|
||||
local tool_start
|
||||
tool_start="$llm_start"
|
||||
|
||||
# If there are multiple assistant parts, the last timestamp is closer to LLM end
|
||||
local llm_end_approx
|
||||
llm_end_approx=$(echo "$assistant_msg" | jq -r '.timestamp // ""')
|
||||
if [ -n "$llm_end_approx" ]; then
|
||||
tool_start="$llm_end_approx"
|
||||
fi
|
||||
|
||||
while IFS= read -r tool; do
|
||||
local tool_id
|
||||
tool_id=$(uuidgen | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
local tool_name
|
||||
tool_name=$(echo "$tool" | jq -r '.name // "tool"')
|
||||
|
||||
local tool_input
|
||||
tool_input=$(echo "$tool" | jq '.input // {}')
|
||||
|
||||
local tool_use_id
|
||||
tool_use_id=$(echo "$tool" | jq -r '.id // ""')
|
||||
|
||||
# Find tool result and extract timestamp from transcript
|
||||
local result_data
|
||||
result_data=$(find_tool_result_with_timestamp "$tool_use_id" "$tool_results")
|
||||
|
||||
local result
|
||||
result=$(echo "$result_data" | jq -r '.result')
|
||||
|
||||
local tool_result_timestamp
|
||||
tool_result_timestamp=$(echo "$result_data" | jq -r '.timestamp // ""')
|
||||
|
||||
# Create dotted_order for tool (child of turn)
|
||||
# Use the tool result timestamp from transcript for proper ordering
|
||||
local tool_timestamp
|
||||
if [ -n "$tool_result_timestamp" ]; then
|
||||
# Convert ISO timestamp to dotted_order format
|
||||
# From: 2025-12-16T17:44:04.397Z
|
||||
# To: 20251216T174404397000Z (milliseconds padded to microseconds)
|
||||
tool_timestamp=$(echo "$tool_result_timestamp" | sed 's/[-:]//g; s/\.\([0-9]*\)Z$/\1000Z/; s/T\([0-9]*\)\([0-9]\{3\}\)000Z$/T\1\2000Z/')
|
||||
else
|
||||
# Fallback to current time if no timestamp in transcript
|
||||
tool_timestamp=$(date -u +"%Y%m%dT%H%M%S")
|
||||
local tool_microseconds
|
||||
tool_microseconds=$(get_microseconds)
|
||||
tool_timestamp="${tool_timestamp}${tool_microseconds}Z"
|
||||
fi
|
||||
|
||||
local tool_dotted_order="${turn_dotted_order}.${tool_timestamp}${tool_id}"
|
||||
|
||||
# Use tool result timestamp for end time as well
|
||||
local tool_end
|
||||
if [ -n "$tool_result_timestamp" ]; then
|
||||
tool_end="$tool_result_timestamp"
|
||||
else
|
||||
tool_end=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
fi
|
||||
|
||||
# Tools are siblings of the assistant run (both children of turn run)
|
||||
local tool_data
|
||||
tool_data=$(jq -n \
|
||||
--arg id "$tool_id" \
|
||||
--arg trace_id "$trace_id" \
|
||||
--arg parent "$turn_id" \
|
||||
--arg name "$tool_name" \
|
||||
--arg project "$PROJECT" \
|
||||
--arg time "$tool_start" \
|
||||
--argjson input "$tool_input" \
|
||||
--arg dotted_order "$tool_dotted_order" \
|
||||
'{
|
||||
id: $id,
|
||||
trace_id: $trace_id,
|
||||
parent_run_id: $parent,
|
||||
name: $name,
|
||||
run_type: "tool",
|
||||
inputs: {input: $input},
|
||||
start_time: $time,
|
||||
dotted_order: $dotted_order,
|
||||
session_name: $project,
|
||||
tags: ["tool"]
|
||||
}')
|
||||
|
||||
posts_batch=$(echo "$posts_batch" | jq --argjson data "$tool_data" '. += [$data]')
|
||||
|
||||
local tool_update
|
||||
tool_update=$(echo "$result" | jq -Rs \
|
||||
--arg time "$tool_end" \
|
||||
--arg id "$tool_id" \
|
||||
--arg trace_id "$trace_id" \
|
||||
--arg parent "$turn_id" \
|
||||
--arg dotted_order "$tool_dotted_order" \
|
||||
'{
|
||||
id: $id,
|
||||
trace_id: $trace_id,
|
||||
parent_run_id: $parent,
|
||||
dotted_order: $dotted_order,
|
||||
outputs: {output: .},
|
||||
end_time: $time
|
||||
}')
|
||||
|
||||
patches_batch=$(echo "$patches_batch" | jq --argjson data "$tool_update" '. += [$data]')
|
||||
|
||||
# Next tool starts after this one ends
|
||||
tool_start="$tool_end"
|
||||
|
||||
done < <(echo "$tool_uses" | jq -c '.[]')
|
||||
|
||||
# Assistant completes after all tools finish
|
||||
assistant_end="$tool_start"
|
||||
else
|
||||
# No tools, assistant completes immediately
|
||||
assistant_end=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
fi
|
||||
|
||||
# Now complete the assistant run
|
||||
local assistant_update
|
||||
assistant_update=$(jq -n \
|
||||
--arg time "$assistant_end" \
|
||||
--arg id "$assistant_id" \
|
||||
--arg trace_id "$trace_id" \
|
||||
--arg parent "$turn_id" \
|
||||
--arg dotted_order "$assistant_dotted_order" \
|
||||
--argjson outputs "$llm_outputs" \
|
||||
--argjson usage_metadata "$usage_metadata" \
|
||||
'{
|
||||
id: $id,
|
||||
trace_id: $trace_id,
|
||||
parent_run_id: $parent,
|
||||
dotted_order: $dotted_order,
|
||||
outputs: ({messages: $outputs} + (if $usage_metadata != null then {usage_metadata: $usage_metadata} else {} end)),
|
||||
end_time: $time
|
||||
}')
|
||||
|
||||
patches_batch=$(echo "$patches_batch" | jq --argjson data "$assistant_update" '. += [$data]')
|
||||
|
||||
# Save end time for next LLM start
|
||||
last_llm_end="$assistant_end"
|
||||
|
||||
# Add to overall outputs
|
||||
all_outputs=$(echo "$all_outputs" | jq --argjson new "$llm_outputs" '. += $new')
|
||||
|
||||
# Add tool results to accumulated context (for next LLM's inputs)
|
||||
if [ "$(echo "$tool_uses" | jq 'length')" -gt 0 ]; then
|
||||
while IFS= read -r tool; do
|
||||
local tool_use_id
|
||||
tool_use_id=$(echo "$tool" | jq -r '.id // ""')
|
||||
local result_data
|
||||
result_data=$(find_tool_result_with_timestamp "$tool_use_id" "$tool_results")
|
||||
local result
|
||||
result=$(echo "$result_data" | jq -r '.result')
|
||||
all_outputs=$(echo "$all_outputs" | jq \
|
||||
--arg id "$tool_use_id" \
|
||||
--arg result "$result" \
|
||||
'. += [{role: "tool", tool_call_id: $id, content: [{type: "text", text: $result}]}]')
|
||||
done < <(echo "$tool_uses" | jq -c '.[]')
|
||||
fi
|
||||
|
||||
done < <(echo "$assistant_messages" | jq -c '.[]')
|
||||
|
||||
# Update turn run with all outputs
|
||||
# Filter out user messages from final outputs
|
||||
local turn_outputs
|
||||
turn_outputs=$(echo "$all_outputs" | jq '[.[] | select(.role != "user")]')
|
||||
|
||||
# Use the last LLM's end time as the turn end time
|
||||
local turn_end="$last_llm_end"
|
||||
|
||||
local turn_update
|
||||
turn_update=$(jq -n \
|
||||
--arg time "$turn_end" \
|
||||
--arg id "$turn_id" \
|
||||
--arg trace_id "$turn_id" \
|
||||
--arg dotted_order "$turn_dotted_order" \
|
||||
--argjson outputs "$turn_outputs" \
|
||||
'{
|
||||
id: $id,
|
||||
trace_id: $trace_id,
|
||||
dotted_order: $dotted_order,
|
||||
outputs: {messages: $outputs},
|
||||
end_time: $time
|
||||
}')
|
||||
|
||||
patches_batch=$(echo "$patches_batch" | jq --argjson data "$turn_update" '. += [$data]')
|
||||
|
||||
# Send both batches
|
||||
send_multipart_batch "post" "$posts_batch" || true
|
||||
send_multipart_batch "patch" "$patches_batch" || true
|
||||
|
||||
# Clear the tracked turn since it's now complete
|
||||
CURRENT_TURN_ID=""
|
||||
|
||||
log "INFO" "Created turn $turn_num: $turn_id with $llm_num LLM call(s)"
|
||||
}
|
||||
|
||||
# Main function
|
||||
main() {
|
||||
# Track execution time
|
||||
local script_start
|
||||
script_start=$(date +%s)
|
||||
|
||||
# Read hook input
|
||||
local hook_input
|
||||
hook_input=$(cat)
|
||||
|
||||
# Check stop_hook_active flag
|
||||
if echo "$hook_input" | jq -e '.stop_hook_active == true' > /dev/null 2>&1; then
|
||||
debug "stop_hook_active=true, skipping"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Extract session info
|
||||
local session_id
|
||||
session_id=$(echo "$hook_input" | jq -r '.session_id // ""')
|
||||
|
||||
local transcript_path
|
||||
transcript_path=$(echo "$hook_input" | jq -r '.transcript_path // ""' | sed "s|^~|$HOME|")
|
||||
|
||||
if [ -z "$session_id" ] || [ ! -f "$transcript_path" ]; then
|
||||
log "WARN" "Invalid input: session=$session_id, transcript=$transcript_path"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
log "INFO" "Processing session $session_id"
|
||||
|
||||
# Load state
|
||||
local state
|
||||
state=$(load_state)
|
||||
|
||||
local last_line
|
||||
last_line=$(echo "$state" | jq -r --arg sid "$session_id" '.[$sid].last_line // -1')
|
||||
|
||||
local turn_count
|
||||
turn_count=$(echo "$state" | jq -r --arg sid "$session_id" '.[$sid].turn_count // 0')
|
||||
|
||||
# Parse new messages
|
||||
local new_messages
|
||||
new_messages=$(awk -v start="$last_line" 'NR > start + 1 && NF' "$transcript_path")
|
||||
|
||||
if [ -z "$new_messages" ]; then
|
||||
debug "No new messages"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
local msg_count
|
||||
msg_count=$(echo "$new_messages" | wc -l)
|
||||
log "INFO" "Found $msg_count new messages"
|
||||
|
||||
# Group into turns
|
||||
local current_user=""
|
||||
local current_assistants="[]" # Array of assistant messages
|
||||
local current_msg_id="" # Current assistant message ID
|
||||
local current_assistant_parts="[]" # Parts of current assistant message
|
||||
local current_tool_results="[]"
|
||||
local turns=0
|
||||
local new_last_line=$last_line
|
||||
|
||||
while IFS= read -r line; do
|
||||
new_last_line=$((new_last_line + 1))
|
||||
|
||||
if [ -z "$line" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
local role
|
||||
role=$(echo "$line" | jq -r 'if type == "object" and has("message") then .message.role elif type == "object" then .role else "unknown" end')
|
||||
|
||||
if [ "$role" = "user" ]; then
|
||||
if [ "$(is_tool_result "$line")" = "true" ]; then
|
||||
# Add to tool results
|
||||
current_tool_results=$(echo "$current_tool_results" | jq --argjson msg "$line" '. += [$msg]')
|
||||
else
|
||||
# New turn - finalize any pending assistant message
|
||||
if [ -n "$current_msg_id" ] && [ "$(echo "$current_assistant_parts" | jq 'length')" -gt 0 ]; then
|
||||
# Merge parts and add to assistants array
|
||||
local merged
|
||||
merged=$(merge_assistant_parts "$current_assistant_parts")
|
||||
current_assistants=$(echo "$current_assistants" | jq --argjson msg "$merged" '. += [$msg]')
|
||||
current_assistant_parts="[]"
|
||||
current_msg_id=""
|
||||
fi
|
||||
|
||||
# Create trace for previous turn
|
||||
if [ -n "$current_user" ] && [ "$(echo "$current_assistants" | jq 'length')" -gt 0 ]; then
|
||||
turns=$((turns + 1))
|
||||
local turn_num=$((turn_count + turns))
|
||||
create_trace "$session_id" "$turn_num" "$current_user" "$current_assistants" "$current_tool_results" || true
|
||||
fi
|
||||
|
||||
# Start new turn
|
||||
current_user="$line"
|
||||
current_assistants="[]"
|
||||
current_assistant_parts="[]"
|
||||
current_msg_id=""
|
||||
current_tool_results="[]"
|
||||
fi
|
||||
elif [ "$role" = "assistant" ]; then
|
||||
# Get message ID
|
||||
local msg_id
|
||||
msg_id=$(echo "$line" | jq -r 'if type == "object" and has("message") then .message.id else "" end')
|
||||
|
||||
if [ -z "$msg_id" ]; then
|
||||
# No message ID, treat as continuation of current message
|
||||
current_assistant_parts=$(echo "$current_assistant_parts" | jq --argjson msg "$line" '. += [$msg]')
|
||||
elif [ "$msg_id" = "$current_msg_id" ]; then
|
||||
# Same message ID, add to current parts
|
||||
current_assistant_parts=$(echo "$current_assistant_parts" | jq --argjson msg "$line" '. += [$msg]')
|
||||
else
|
||||
# New message ID - finalize previous message if any
|
||||
if [ -n "$current_msg_id" ] && [ "$(echo "$current_assistant_parts" | jq 'length')" -gt 0 ]; then
|
||||
# Merge parts and add to assistants array
|
||||
local merged
|
||||
merged=$(merge_assistant_parts "$current_assistant_parts")
|
||||
current_assistants=$(echo "$current_assistants" | jq --argjson msg "$merged" '. += [$msg]')
|
||||
fi
|
||||
|
||||
# Start new assistant message
|
||||
current_msg_id="$msg_id"
|
||||
current_assistant_parts=$(jq -n --argjson msg "$line" '[$msg]')
|
||||
fi
|
||||
fi
|
||||
done <<< "$new_messages"
|
||||
|
||||
# Process final turn - finalize any pending assistant message
|
||||
if [ -n "$current_msg_id" ] && [ "$(echo "$current_assistant_parts" | jq 'length')" -gt 0 ]; then
|
||||
local merged
|
||||
merged=$(merge_assistant_parts "$current_assistant_parts")
|
||||
current_assistants=$(echo "$current_assistants" | jq --argjson msg "$merged" '. += [$msg]')
|
||||
fi
|
||||
|
||||
if [ -n "$current_user" ] && [ "$(echo "$current_assistants" | jq 'length')" -gt 0 ]; then
|
||||
turns=$((turns + 1))
|
||||
local turn_num=$((turn_count + turns))
|
||||
create_trace "$session_id" "$turn_num" "$current_user" "$current_assistants" "$current_tool_results" || true
|
||||
fi
|
||||
|
||||
# Update state
|
||||
local updated
|
||||
updated=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
state=$(echo "$state" | jq \
|
||||
--arg sid "$session_id" \
|
||||
--arg line "$new_last_line" \
|
||||
--arg count "$((turn_count + turns))" \
|
||||
--arg time "$updated" \
|
||||
'.[$sid] = {last_line: ($line | tonumber), turn_count: ($count | tonumber), updated: $time}')
|
||||
|
||||
save_state "$state"
|
||||
|
||||
# Log execution time
|
||||
local script_end
|
||||
script_end=$(date +%s)
|
||||
local duration=$((script_end - script_start))
|
||||
|
||||
log "INFO" "Processed $turns turns in ${duration}s"
|
||||
if [ "$duration" -gt 180 ]; then
|
||||
log "WARN" "Hook took ${duration}s (>3min), consider optimizing"
|
||||
fi
|
||||
}
|
||||
|
||||
# Run main
|
||||
main
|
||||
|
||||
exit 0
|
||||
@@ -0,0 +1,331 @@
|
||||
# Implementation Plan: Subagent Tracing in LangSmith
|
||||
|
||||
## Overview
|
||||
Add comprehensive subagent tracing to `stop_hook.sh` to capture Task tool executions and their agent transcript conversations as nested runs in LangSmith.
|
||||
|
||||
## User Requirements
|
||||
- Trace ALL Task tool invocations (all subagent_types: Explore, Plan, etc.)
|
||||
- Create individual child runs for each agent message (user/assistant/tool within agent)
|
||||
- Agent files stored in same directory as main transcript: `agent-{agentId}.jsonl`
|
||||
- Correlate using `agentId` from `toolUseResult` field in main transcript
|
||||
|
||||
## Current Architecture
|
||||
|
||||
### Main Transcript Structure
|
||||
- Location: Passed as `transcript_path` in hook input (e.g., `cc_transcript.jsonl`)
|
||||
- Contains: User messages, assistant messages with tool_use, tool_result messages
|
||||
- Task tools have special `toolUseResult.agentId` field in tool_result messages
|
||||
|
||||
### Agent Transcript Structure
|
||||
- Location: Same directory as main transcript, named `agent-{agentId}.jsonl`
|
||||
- Format: One JSON object per line, same schema as main transcript
|
||||
- Contains: Agent's internal conversation (user prompts, assistant responses, tool calls)
|
||||
|
||||
### Current Tool Processing (stop_hook.sh lines 599-695)
|
||||
- Iterates through tool_uses from assistant messages
|
||||
- Creates tool runs as siblings of assistant (both children of turn)
|
||||
- Matches tool results using `find_tool_result_with_timestamp()`
|
||||
- Batches POST/PATCH operations with `send_multipart_batch()`
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
### 1. Add Detection Functions (Insert after line 263)
|
||||
|
||||
**New functions:**
|
||||
```bash
|
||||
# Check if tool is a Task tool
|
||||
is_task_tool() {
|
||||
local tool="$1"
|
||||
local tool_name=$(echo "$tool" | jq -r '.name // ""')
|
||||
[ "$tool_name" = "Task" ]
|
||||
}
|
||||
|
||||
# Extract agentId from tool result
|
||||
get_agent_id_from_result() {
|
||||
local tool_use_id="$1"
|
||||
local tool_results="$2"
|
||||
|
||||
echo "$tool_results" | jq -r --arg id "$tool_use_id" '
|
||||
first(
|
||||
.[] |
|
||||
select(.toolUseResult.agentId != null) |
|
||||
select(
|
||||
(.message.content // .content) as $content |
|
||||
if $content | type == "array" then
|
||||
$content[] | select(.type == "tool_result" and .tool_use_id == $id)
|
||||
else false end
|
||||
) |
|
||||
.toolUseResult.agentId
|
||||
) // ""
|
||||
'
|
||||
}
|
||||
|
||||
# Get path to agent transcript file
|
||||
get_agent_transcript_path() {
|
||||
local transcript_path="$1"
|
||||
local agent_id="$2"
|
||||
local transcript_dir=$(dirname "$transcript_path")
|
||||
echo "${transcript_dir}/agent-${agent_id}.jsonl"
|
||||
}
|
||||
```
|
||||
|
||||
**Purpose:** Identify Task tools and locate corresponding agent files
|
||||
|
||||
### 2. Add Agent Processing Function (Insert after detection functions)
|
||||
|
||||
**New function:** `process_agent_transcript()`
|
||||
|
||||
**Parameters:**
|
||||
- `parent_tool_id`: Task tool's run ID (parent for agent messages)
|
||||
- `agent_id`: Agent identifier (e.g., "558bc970")
|
||||
- `main_transcript_path`: Path to main transcript for deriving agent file path
|
||||
- `tool_result_timestamp`: Timestamp from tool_result for ordering
|
||||
- `parent_dotted_order`: Task tool's dotted_order for hierarchy
|
||||
- `trace_id`: Trace ID for all runs in this trace
|
||||
- `posts_batch_ref`: Variable name containing posts batch array
|
||||
- `patches_batch_ref`: Variable name containing patches batch array
|
||||
|
||||
**Logic:**
|
||||
1. Derive agent file path from main transcript path + agent_id
|
||||
2. Check if agent file exists (graceful exit if not)
|
||||
3. Read agent file line by line
|
||||
4. For each agent message:
|
||||
- Extract role (assistant/user), timestamp, content
|
||||
- Skip tool_result messages (they're already processed)
|
||||
- Create run with proper dotted_order: `{parent_tool_order}.{msg_timestamp}{msg_uuid}`
|
||||
- For assistant messages: Extract model, usage, tool_uses
|
||||
- For assistant with tool_uses: Create child tool runs
|
||||
- Add to posts_batch and patches_batch using indirect variable refs
|
||||
|
||||
**Key features:**
|
||||
- Handles nested tool calls within agent
|
||||
- Preserves timestamps from agent transcript
|
||||
- Maintains proper hierarchy via dotted_order
|
||||
- Efficient: line-by-line processing, no full file load
|
||||
|
||||
### 3. Integrate into Tool Processing Loop (Modify lines 599-695)
|
||||
|
||||
**Insert point:** Line 690 (after Task tool POST, before tool PATCH)
|
||||
|
||||
**Integration code:**
|
||||
```bash
|
||||
# After creating the tool run (POST)...
|
||||
|
||||
# Check if this is a Task tool
|
||||
if is_task_tool "$tool"; then
|
||||
debug "Detected Task tool: $tool_name"
|
||||
|
||||
# Extract agentId from tool result
|
||||
local agent_id
|
||||
agent_id=$(get_agent_id_from_result "$tool_use_id" "$tool_results")
|
||||
|
||||
if [ -n "$agent_id" ]; then
|
||||
debug "Found agentId: $agent_id for tool $tool_use_id"
|
||||
|
||||
# Process agent transcript
|
||||
process_agent_transcript \
|
||||
"$tool_id" \
|
||||
"$agent_id" \
|
||||
"$transcript_path" \
|
||||
"$tool_result_timestamp" \
|
||||
"$tool_dotted_order" \
|
||||
"$trace_id" \
|
||||
"posts_batch" \
|
||||
"patches_batch"
|
||||
else
|
||||
debug "No agentId found for Task tool $tool_use_id"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Then create tool completion (PATCH)...
|
||||
```
|
||||
|
||||
**Rationale:**
|
||||
- Task tool run must exist before agent messages can reference it as parent
|
||||
- Agent processing happens between tool creation and completion
|
||||
- All runs batched together for efficient API submission
|
||||
|
||||
### 4. Store Transcript Path (Modify line 805)
|
||||
|
||||
**Current:**
|
||||
```bash
|
||||
local transcript_path
|
||||
transcript_path=$(echo "$hook_input" | jq -r '.transcript_path // ""' | sed "s|^~|$HOME|")
|
||||
```
|
||||
|
||||
**Required:** Ensure `transcript_path` variable is accessible in `create_trace()` function scope
|
||||
|
||||
**Solution:** Pass `transcript_path` as parameter to `create_trace()` or make it a global variable accessible throughout the script
|
||||
|
||||
## LangSmith Run Hierarchy
|
||||
|
||||
**Before (current):**
|
||||
```
|
||||
Turn (Claude Code chain)
|
||||
├── Assistant (llm)
|
||||
├── Read (tool)
|
||||
├── Assistant (llm)
|
||||
└── Edit (tool)
|
||||
```
|
||||
|
||||
**After (with agents):**
|
||||
```
|
||||
Turn (Claude Code chain)
|
||||
├── Assistant (llm)
|
||||
├── Task (tool)
|
||||
│ ├── Agent: claude-haiku-4-5 (llm)
|
||||
│ ├── Glob (tool)
|
||||
│ ├── Read (tool)
|
||||
│ ├── Agent: claude-haiku-4-5 (llm)
|
||||
│ └── Bash (tool)
|
||||
├── Assistant (llm)
|
||||
└── Edit (tool)
|
||||
```
|
||||
|
||||
**Key relationships:**
|
||||
- Turn run = parent for both main assistant AND all tools (including Task)
|
||||
- Task tool run = parent for all agent messages and agent tools
|
||||
- Agent assistant runs = children of Task tool
|
||||
- Agent tool calls = siblings of agent assistant runs (both children of Task tool)
|
||||
- All share same trace_id for unified trace view
|
||||
|
||||
**Important:** Tools are siblings of assistants, not nested under them. This matches Claude Code's execution model where tool calls happen between assistant messages.
|
||||
|
||||
## Dotted Order Management
|
||||
|
||||
**Format:** `YYYYMMDDTHHMMSSffffffZ{uuid}`
|
||||
|
||||
**Hierarchy encoding:**
|
||||
- Turn: `20251216T174404397000Z{turn_uuid}`
|
||||
- Task tool: `{turn_order}.{tool_timestamp}{tool_uuid}`
|
||||
- Agent message: `{tool_order}.{agent_msg_timestamp}{agent_msg_uuid}`
|
||||
- Agent tool: `{agent_msg_order}.{agent_tool_timestamp}{agent_tool_uuid}`
|
||||
|
||||
**Example:**
|
||||
```
|
||||
Turn: 20251216T174404397000Za1b2c3d4
|
||||
Task tool: 20251216T174404397000Za1b2c3d4.20251216T174455000000Zi9j0k1l2
|
||||
Agent msg: 20251216T174404397000Za1b2c3d4.20251216T174455000000Zi9j0k1l2.20251216T174409317000Zm3n4o5p6
|
||||
Agent tool: 20251216T174404397000Za1b2c3d4.20251216T174455000000Zi9j0k1l2.20251216T174409317000Zm3n4o5p6.20251216T174410733000Zq7r8s9t0
|
||||
```
|
||||
|
||||
LangSmith sorts runs lexicographically by dotted_order, ensuring proper visual hierarchy.
|
||||
|
||||
## Error Handling
|
||||
|
||||
**Missing agent file:**
|
||||
- Check: `[ ! -f "$agent_file" ]`
|
||||
- Action: Log debug message, return gracefully
|
||||
- Impact: Main trace completes normally, just without agent details
|
||||
|
||||
**Empty agent transcript:**
|
||||
- Check: `[ -z "$agent_messages" ]`
|
||||
- Action: Log debug message, return gracefully
|
||||
|
||||
**Invalid JSON in agent file:**
|
||||
- Mitigation: Use `jq` with `2>/dev/null` and `|| echo ""` fallbacks
|
||||
- Parse errors don't crash hook
|
||||
|
||||
**Large agent transcripts (100+ messages):**
|
||||
- Solution: Line-by-line processing with `while read`
|
||||
- Memory efficient, no timeout issues expected
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
**Current:** 10 turns × 5 tools = ~100 operations → 2 API calls (POST + PATCH batches) → ~2-5s
|
||||
|
||||
**With agents:** 10 turns × 1 Task × 20 agent messages = +400 operations → Same 2 API calls → ~5-10s
|
||||
|
||||
**Optimization:**
|
||||
- Agent runs added to existing batches (no extra API calls)
|
||||
- Multipart batch endpoint handles large payloads efficiently
|
||||
- Line-by-line processing prevents memory issues
|
||||
|
||||
## Testing Strategy
|
||||
|
||||
**Test cases:**
|
||||
1. Single Task tool with 5 agent messages → Verify 1 Task + 5 child runs
|
||||
2. Multiple Task tools in same turn → Verify independent agent hierarchies
|
||||
3. Missing agent file → Verify graceful degradation
|
||||
4. Agent with tool calls → Verify nested tool runs under agent assistant
|
||||
5. Large agent (50+ messages) → Verify performance <10s
|
||||
|
||||
**Validation:**
|
||||
- Agent runs appear as children of Task in LangSmith UI
|
||||
- Timestamps accurate, dotted_order correct
|
||||
- Usage metadata captured for agent LLM calls
|
||||
- Tags distinguish agent runs ("agent", "subagent", "agent-tool")
|
||||
|
||||
## Critical Files
|
||||
|
||||
**Primary:**
|
||||
- `/Users/tanushreesharma/tracing-claude-code/stop_hook.sh` - Main implementation file
|
||||
- Lines 263: Insert detection functions (~30 lines)
|
||||
- After 263: Insert processing function (~250 lines)
|
||||
- Line 690: Insert integration code (~20 lines)
|
||||
- Line 805: Ensure transcript_path accessible
|
||||
|
||||
**Reference:**
|
||||
- `cc_transcript.jsonl` - Example main transcript with Task tools
|
||||
- `agent-*.jsonl` - Example agent transcripts
|
||||
- `$HOME/.claude/state/hook.log` - Debug output for troubleshooting
|
||||
|
||||
## Rollout Plan
|
||||
|
||||
**Phase 1: Core Implementation**
|
||||
- Add detection and processing functions
|
||||
- Integrate into tool loop
|
||||
- Test with simple Task tool (single agent, few messages)
|
||||
|
||||
**Phase 2: Validation**
|
||||
- Test with multiple Task tools
|
||||
- Test with large agent transcripts
|
||||
- Verify LangSmith UI displays correctly
|
||||
|
||||
**Phase 3: Production**
|
||||
- Enable in production environment
|
||||
- Monitor logs for errors
|
||||
- Collect user feedback
|
||||
|
||||
## Success Criteria
|
||||
|
||||
✓ All Task tool invocations traced with agent details
|
||||
✓ Agent messages appear as proper child runs in LangSmith
|
||||
✓ Correct hierarchy and ordering maintained
|
||||
✓ No performance degradation (hook completes in <10s)
|
||||
✓ Graceful handling of missing/invalid agent files
|
||||
✓ Clear debug logging for troubleshooting
|
||||
|
||||
## Edge Cases
|
||||
|
||||
**Nested Task tools:** Agent calls Task → creates sub-agent
|
||||
- Handled: Recursive processing via `process_agent_transcript`
|
||||
- Limit: Consider depth limit (max 3 levels) if performance issues
|
||||
|
||||
**Concurrent agents:** Multiple Task tools in same turn
|
||||
- Handled: Each agent processed independently in loop
|
||||
- No conflicts (unique agentId, separate files)
|
||||
|
||||
**Agent file not yet written:** Hook runs before agent file created
|
||||
- Handled: File check returns gracefully
|
||||
- Next hook execution will pick it up if tool_result present
|
||||
|
||||
## Implementation Estimate
|
||||
|
||||
**Code size:**
|
||||
- Detection functions: ~30 lines
|
||||
- Processing function: ~250 lines
|
||||
- Integration code: ~20 lines
|
||||
- **Total new code: ~300 lines**
|
||||
|
||||
**Effort:**
|
||||
- Implementation: 4-6 hours
|
||||
- Testing: 2-3 hours
|
||||
- Documentation: 1 hour
|
||||
- **Total: ~1 day**
|
||||
|
||||
**Risk level:** Medium
|
||||
- Touching production hook script
|
||||
- Complex nested structure
|
||||
- Multiple edge cases to handle
|
||||
- Mitigated by: Graceful error handling, extensive testing, debug logging
|
||||
Executable
+27
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Test script to manually trigger the stop hook with cc_transcript.jsonl
|
||||
|
||||
# Set the session ID from the transcript
|
||||
SESSION_ID="6bb19f49-d296-485d-8eb8-c5cbb8a9b80d"
|
||||
TRANSCRIPT_PATH="$HOME/tracing-claude-code/cc_transcript.jsonl"
|
||||
|
||||
# Create hook input
|
||||
HOOK_INPUT=$(jq -n \
|
||||
--arg sid "$SESSION_ID" \
|
||||
--arg path "$TRANSCRIPT_PATH" \
|
||||
'{
|
||||
session_id: $sid,
|
||||
transcript_path: $path,
|
||||
stop_hook_active: false
|
||||
}')
|
||||
|
||||
echo "Testing hook with cc_transcript.jsonl..."
|
||||
echo "Session ID: $SESSION_ID"
|
||||
echo
|
||||
|
||||
# Call the hook
|
||||
echo "$HOOK_INPUT" | bash ./stop_hook.sh
|
||||
|
||||
echo
|
||||
echo "Done! Check ~/.claude/state/hook.log for details"
|
||||
+262
@@ -0,0 +1,262 @@
|
||||
# Test Suite for stop_hook.sh
|
||||
|
||||
Comprehensive pytest-based test suite for `stop_hook.sh`, which implements tracing from Claude Code -> LangSmith.
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Install test dependencies
|
||||
.venv/bin/pip install -r tests/requirements-test.txt
|
||||
|
||||
# Run all unit tests (no API key needed)
|
||||
.venv/bin/pytest tests/unit/ -v
|
||||
|
||||
# Run with coverage
|
||||
.venv/bin/pytest tests/unit/ --cov=tests --cov-report=html
|
||||
|
||||
# View coverage report
|
||||
open htmlcov/index.html
|
||||
```
|
||||
|
||||
## Test Results
|
||||
|
||||
Current Status: **279 passing unit tests** covering comprehensive functionality ✅
|
||||
|
||||
### Test Coverage
|
||||
|
||||
#### Core Functions
|
||||
- **Message Parsing** (6 tests): ✅ get_content, is_tool_result, get_tool_uses
|
||||
- **Content Formatting** (13 tests): ✅ format_content, merge_assistant_parts, get_usage_from_parts
|
||||
- **Utilities** (6 tests): ✅ get_microseconds, get_file_size
|
||||
- **Cost Tracking** (7 tests): ✅ usage_metadata calculation, cache token tracking
|
||||
- **Trace Ordering** (16 tests): ✅ dotted_order format, chronological sorting, parent-child relationships
|
||||
- **Model Name Formatting** (15 tests): ✅ Date suffix stripping for cleaner model names
|
||||
- **State Management** (5 tests): ✅ load_state, save_state with isolated environment
|
||||
|
||||
#### API & Network Operations (35 tests)
|
||||
- **API Call Function** (6 tests): ✅ HTTP request handling, headers, timeout configuration
|
||||
- **API Error Handling** (3 tests): ✅ POST/PATCH structure, timeout handling
|
||||
- **Multipart Batch Sending** (9 tests): ✅ Batch uploads, temp file management, endpoint usage
|
||||
- **Cleanup on Exit** (7 tests): ✅ Pending turn cleanup, error handling, trap configuration
|
||||
- **API Key Handling** (3 tests): ✅ Environment variable fallback, validation
|
||||
- **HTTP Response Handling** (4 tests): ✅ Success codes, error logging, response body handling
|
||||
- **Project Configuration** (3 tests): ✅ Project name, API base URL configuration
|
||||
|
||||
#### Main Entry Point & Workflow (54 tests)
|
||||
- **Hook Input Parsing** (6 tests): ✅ session_id, transcript_path extraction and validation
|
||||
- **Stop Hook Active Flag** (2 tests): ✅ Recursive execution prevention
|
||||
- **Incremental Processing** (6 tests): ✅ last_line tracking, awk-based skipping
|
||||
- **Turn Grouping** (8 tests): ✅ User/assistant/tool message grouping logic
|
||||
- **SSE Streaming Merge** (6 tests): ✅ Message ID tracking, part accumulation
|
||||
- **State Updates** (4 tests): ✅ Session-specific state persistence
|
||||
- **Execution Time Tracking** (5 tests): ✅ Duration calculation, slow execution warnings
|
||||
- **Tracing Disabled Check** (3 tests): ✅ TRACE_TO_LANGSMITH validation
|
||||
- **Required Commands** (4 tests): ✅ jq, curl, uuidgen availability checks
|
||||
- **Final Turn Processing** (2 tests): ✅ Pending message handling at EOF
|
||||
- **Main Logging** (4 tests): ✅ Session start, message counts, turn tracking
|
||||
- **Main Integration** (4 tests): ✅ End-to-end validation with mocked environment
|
||||
|
||||
#### Timestamp Conversion (21 tests)
|
||||
- **ISO to Dotted Order** (9 tests): ✅ Format conversion, padding, delimiter removal
|
||||
- **Dotted Order Format** (2 tests): ✅ Timestamp format validation
|
||||
- **Chronological Ordering** (3 tests): ✅ Sort order verification across timestamps
|
||||
- **Edge Cases** (5 tests): ✅ Midnight, end-of-day, zero milliseconds, leap years
|
||||
- **Real Transcript Data** (2 tests): ✅ Actual timestamp format from cc_transcript.jsonl
|
||||
|
||||
#### Multipart Serialization (29 tests)
|
||||
- **Serialize Function** (11 tests): ✅ Operation/run_json/temp_dir parameters, file creation
|
||||
- **File Naming** (4 tests): ✅ Main/inputs/outputs file naming conventions
|
||||
- **Data Separation** (4 tests): ✅ Excluding inputs/outputs from main data
|
||||
- **Integration Tests** (6 tests): ✅ POST/PATCH operations, file existence validation
|
||||
- **Curl Format** (4 tests): ✅ -F arguments, Content-Length headers, part naming
|
||||
|
||||
#### Trace Creation (65 tests)
|
||||
- **Create Trace Function** (6 tests): ✅ Parameter acceptance and structure
|
||||
- **Turn Run Creation** (8 tests): ✅ Chain type, UUID generation, dotted_order, tags
|
||||
- **Assistant Run Creation** (8 tests): ✅ LLM type, parent relationships, model metadata
|
||||
- **Tool Run Creation** (7 tests): ✅ Tool type, inputs, parent relationships
|
||||
- **Tool Result Finding** (5 tests): ✅ Result lookup by ID, timestamp extraction
|
||||
- **Usage Metadata** (6 tests): ✅ Token counts, cache tracking, input/output details
|
||||
- **Dotted Order Hierarchy** (3 tests): ✅ Parent-child dotted_order relationships
|
||||
- **Outputs Accumulation** (4 tests): ✅ Message accumulation across LLM calls
|
||||
- **Batch Processing** (10 tests): ✅ POST/PATCH batch creation and submission
|
||||
- **Current Turn Tracking** (2 tests): ✅ CURRENT_TURN_ID for cleanup
|
||||
- **Multiple LLM Calls** (4 tests): ✅ Iteration, numbering, context accumulation
|
||||
- **Logging** (2 tests): ✅ Turn creation, LLM call logging
|
||||
|
||||
## Test Structure
|
||||
|
||||
```
|
||||
tests/
|
||||
├── conftest.py # Pytest fixtures
|
||||
├── pytest.ini # Pytest configuration
|
||||
├── requirements-test.txt # Test dependencies
|
||||
├── unit/ # Unit tests (no external dependencies)
|
||||
│ ├── test_message_parsing.py # Content extraction (6 tests)
|
||||
│ ├── test_content_formatting.py # LangSmith format (13 tests)
|
||||
│ ├── test_state_management.py # State persistence (5 tests)
|
||||
│ ├── test_utilities.py # Cross-platform utils (6 tests)
|
||||
│ ├── test_cost_tracking.py # Token usage & cost (7 tests)
|
||||
│ ├── test_trace_ordering.py # Timestamp ordering (16 tests)
|
||||
│ ├── test_model_name_formatting.py # Model name cleaning (15 tests)
|
||||
│ ├── test_api_calls.py # API operations & batch sending (35 tests)
|
||||
│ ├── test_main_entry.py # Main workflow & entry point (54 tests)
|
||||
│ ├── test_timestamp_conversion.py # ISO to dotted_order conversion (21 tests)
|
||||
│ ├── test_multipart_serialization.py # Multipart file handling (29 tests)
|
||||
│ └── test_trace_creation.py # Trace structure & hierarchy (65 tests)
|
||||
├── helpers/ # Test utilities
|
||||
│ ├── bash_runner.py # Execute bash functions in isolation
|
||||
│ ├── langsmith_client.py # LangSmith API helpers
|
||||
│ ├── transcript_parser.py # JSONL test data generation
|
||||
│ └── state_manager.py # State file management
|
||||
└── test_data/ # Test fixtures
|
||||
├── minimal_transcript.jsonl
|
||||
├── multi_turn.jsonl
|
||||
├── with_tools.jsonl
|
||||
└── streaming_sse.jsonl
|
||||
```
|
||||
|
||||
## Key Features
|
||||
|
||||
### 1. BashRunner - Test Bash Functions in Isolation
|
||||
|
||||
```python
|
||||
from tests.helpers.bash_runner import BashRunner
|
||||
|
||||
runner = BashRunner()
|
||||
|
||||
# Call any bash function from stop_hook.sh
|
||||
result = runner.call_function("get_content", '{"message": {"content": "hello"}}')
|
||||
print(result) # "hello"
|
||||
```
|
||||
|
||||
### 2. TranscriptBuilder - Generate Test Data
|
||||
|
||||
```python
|
||||
from tests.helpers.transcript_parser import TranscriptBuilder
|
||||
|
||||
builder = TranscriptBuilder(Path("test.jsonl"))
|
||||
builder.add_user_message("Hello")
|
||||
builder.add_assistant_message("Hi there!")
|
||||
builder, tool_id = builder.add_tool_use("Read", {"file_path": "/test.txt"})
|
||||
builder.add_tool_result(tool_id, "File content")
|
||||
builder.build()
|
||||
```
|
||||
|
||||
### 3. LangSmith Client - Verify Traces
|
||||
|
||||
```python
|
||||
from tests.helpers.langsmith_client import LangSmithTestClient
|
||||
|
||||
client = LangSmithTestClient()
|
||||
|
||||
# Fetch traces
|
||||
traces = client.fetch_traces(limit=10)
|
||||
|
||||
# Get child runs
|
||||
children = client.get_child_runs(parent_run_id)
|
||||
```
|
||||
|
||||
## Running Tests
|
||||
|
||||
### Unit Tests Only (Default)
|
||||
|
||||
```bash
|
||||
# Run all unit tests
|
||||
.venv/bin/pytest tests/unit/ -v
|
||||
|
||||
# Run specific test file
|
||||
.venv/bin/pytest tests/unit/test_message_parsing.py -v
|
||||
|
||||
# Run specific test
|
||||
.venv/bin/pytest tests/unit/test_message_parsing.py::TestGetContent::test_get_content_from_message_wrapper -v
|
||||
```
|
||||
|
||||
### With Coverage
|
||||
|
||||
```bash
|
||||
# Generate coverage report
|
||||
.venv/bin/pytest tests/unit/ --cov=tests --cov-report=html --cov-report=term-missing
|
||||
|
||||
# View HTML report
|
||||
open htmlcov/index.html
|
||||
```
|
||||
|
||||
### Integration Tests (Future)
|
||||
|
||||
Integration tests require `CC_LANGSMITH_API_KEY`:
|
||||
|
||||
```bash
|
||||
# Run integration tests
|
||||
CC_LANGSMITH_API_KEY="your_key" .venv/bin/pytest tests/integration/ -v -m integration
|
||||
```
|
||||
|
||||
## Test Fixtures
|
||||
|
||||
### Sample Data Fixtures (conftest.py)
|
||||
|
||||
- `sample_user_message` - Example user message
|
||||
- `sample_assistant_message` - Assistant message with tool use
|
||||
- `sample_tool_result` - Tool result message
|
||||
- `sample_streaming_parts` - SSE streaming parts
|
||||
|
||||
### Helper Fixtures
|
||||
|
||||
- `bash_executor` - BashRunner instance
|
||||
- `langsmith_client` - LangSmith API client
|
||||
- `state_manager` - State file manager
|
||||
- `transcript_builder` - Transcript generator
|
||||
- `temp_state_file` - Isolated state file
|
||||
- `temp_transcript` - Temporary transcript path
|
||||
|
||||
### Example Usage
|
||||
|
||||
```python
|
||||
def test_example(bash_executor, sample_assistant_message):
|
||||
msg = json.dumps(sample_assistant_message)
|
||||
result = bash_executor.call_function("get_content", msg)
|
||||
content = json.loads(result)
|
||||
assert len(content) == 3
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Tests Failing with "Function not found"
|
||||
|
||||
The bash_runner removes the early exit check from stop_hook.sh. If functions are not found, ensure:
|
||||
1. stop_hook.sh is in the correct location
|
||||
2. The sed pattern matches the early exit block
|
||||
|
||||
### State Management Tests Using Real State File
|
||||
|
||||
The `STATE_FILE` environment variable should point to a temp file, but stop_hook.sh has it hardcoded. To fix:
|
||||
- Modify stop_hook.sh line 47 to: `STATE_FILE="${STATE_FILE:-$HOME/.claude/state/langsmith_state.json}"`
|
||||
- Or: Run tests in isolation and clean up afterwards
|
||||
|
||||
### Integration Tests Require API Key
|
||||
|
||||
Integration tests need a valid LangSmith API key:
|
||||
|
||||
```bash
|
||||
export CC_LANGSMITH_API_KEY="lsv2_pt_..."
|
||||
.venv/bin/pytest tests/integration/ -v -m integration
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
When adding new functions to stop_hook.sh:
|
||||
|
||||
1. Add corresponding unit tests
|
||||
2. Use BashRunner to test in isolation
|
||||
3. Add sample fixtures if needed
|
||||
4. Ensure 80%+ test coverage
|
||||
5. Run tests before committing
|
||||
|
||||
Example:
|
||||
|
||||
```python
|
||||
def test_new_function(bash_executor):
|
||||
"""Test description"""
|
||||
result = bash_executor.call_function("new_function", "arg1", "arg2")
|
||||
assert result == "expected_value"
|
||||
```
|
||||
@@ -0,0 +1,215 @@
|
||||
"""
|
||||
Pytest configuration and shared fixtures for stop_hook.sh tests.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.helpers.bash_runner import BashRunner
|
||||
from tests.helpers.state_manager import StateManager
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Test Environment Fixtures
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def temp_state_file(tmp_path):
|
||||
"""
|
||||
Provide an isolated state file for each test.
|
||||
|
||||
Returns:
|
||||
Path to temporary state file
|
||||
"""
|
||||
state_file = tmp_path / "langsmith_state.json"
|
||||
yield state_file
|
||||
# Cleanup happens automatically via tmp_path
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def isolated_env(monkeypatch, temp_state_file, tmp_path):
|
||||
"""
|
||||
Ensure each test has isolated environment variables.
|
||||
|
||||
This fixture automatically applies to all tests.
|
||||
"""
|
||||
# Override state file location
|
||||
monkeypatch.setenv("STATE_FILE", str(temp_state_file))
|
||||
|
||||
# Override log file location
|
||||
log_file = tmp_path / "hook.log"
|
||||
monkeypatch.setenv("LOG_FILE", str(log_file))
|
||||
|
||||
# Disable tracing during most tests (can be re-enabled per test)
|
||||
monkeypatch.setenv("TRACE_TO_LANGSMITH", "false")
|
||||
|
||||
# Disable debug logging
|
||||
monkeypatch.setenv("CC_LANGSMITH_DEBUG", "false")
|
||||
|
||||
# Set test project
|
||||
monkeypatch.setenv("CC_LANGSMITH_PROJECT", "cc-test")
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Helper Class Fixtures
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def bash_executor():
|
||||
"""
|
||||
Provide BashRunner for executing bash functions in isolation.
|
||||
|
||||
Returns:
|
||||
BashRunner instance
|
||||
"""
|
||||
return BashRunner("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def state_manager(temp_state_file):
|
||||
"""
|
||||
Provide StateManager for managing test state files.
|
||||
|
||||
Returns:
|
||||
StateManager instance with isolated state file
|
||||
"""
|
||||
return StateManager(temp_state_file)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Sample Data Fixtures
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def sample_assistant_message():
|
||||
"""
|
||||
Provide a sample assistant message with tool use.
|
||||
|
||||
Returns:
|
||||
Dictionary with assistant message structure
|
||||
"""
|
||||
return {
|
||||
"type": "assistant",
|
||||
"message": {
|
||||
"id": "msg_test123",
|
||||
"role": "assistant",
|
||||
"model": "claude-sonnet-4-5-20250929",
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "Let me analyze this..."},
|
||||
{"type": "text", "text": "Here's my response"},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "tool_test_abc",
|
||||
"name": "Read",
|
||||
"input": {"file_path": "/test/file.txt"}
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"input_tokens": 100,
|
||||
"output_tokens": 50,
|
||||
"cache_read_input_tokens": 1000
|
||||
}
|
||||
},
|
||||
"timestamp": "2025-01-01T00:00:01Z"
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_tool_result():
|
||||
"""
|
||||
Provide a sample tool result message.
|
||||
|
||||
Returns:
|
||||
Dictionary with tool result structure
|
||||
"""
|
||||
return {
|
||||
"type": "user",
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "tool_test_abc",
|
||||
"content": "File contents: hello world"
|
||||
}
|
||||
],
|
||||
"timestamp": "2025-01-01T00:00:02Z"
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_streaming_parts():
|
||||
"""
|
||||
Provide sample streaming assistant message parts (SSE simulation).
|
||||
|
||||
Returns:
|
||||
List of message dictionaries with same ID
|
||||
"""
|
||||
msg_id = "msg_stream_123"
|
||||
return [
|
||||
{
|
||||
"type": "assistant",
|
||||
"message": {
|
||||
"id": msg_id,
|
||||
"role": "assistant",
|
||||
"model": "claude-sonnet-4-5-20250929",
|
||||
"content": [{"type": "text", "text": "Hello "}],
|
||||
"usage": {"input_tokens": 10, "output_tokens": 2}
|
||||
},
|
||||
"timestamp": "2025-01-01T00:00:00.000Z"
|
||||
},
|
||||
{
|
||||
"type": "assistant",
|
||||
"message": {
|
||||
"id": msg_id,
|
||||
"role": "assistant",
|
||||
"model": "claude-sonnet-4-5-20250929",
|
||||
"content": [{"type": "text", "text": "world"}],
|
||||
"usage": {"input_tokens": 10, "output_tokens": 4} # Cumulative
|
||||
},
|
||||
"timestamp": "2025-01-01T00:00:00.100Z"
|
||||
},
|
||||
{
|
||||
"type": "assistant",
|
||||
"message": {
|
||||
"id": msg_id,
|
||||
"role": "assistant",
|
||||
"model": "claude-sonnet-4-5-20250929",
|
||||
"content": [{"type": "text", "text": "!"}],
|
||||
"usage": {"input_tokens": 10, "output_tokens": 5} # Cumulative
|
||||
},
|
||||
"timestamp": "2025-01-01T00:00:00.200Z"
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Session ID Fixture
|
||||
# =============================================================================
|
||||
|
||||
@pytest.fixture
|
||||
def session_id():
|
||||
"""
|
||||
Generate a unique session ID for each test.
|
||||
|
||||
Returns:
|
||||
UUID string
|
||||
"""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Parametrize Helpers
|
||||
# =============================================================================
|
||||
|
||||
def pytest_configure(config):
|
||||
"""Configure pytest with custom markers."""
|
||||
config.addinivalue_line(
|
||||
"markers", "unit: mark test as a unit test (no external dependencies)"
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers", "integration: mark test as an integration test (requires API access)"
|
||||
)
|
||||
config.addinivalue_line(
|
||||
"markers", "slow: mark test as slow (takes >5 seconds)"
|
||||
)
|
||||
@@ -0,0 +1,147 @@
|
||||
"""
|
||||
BashRunner - Execute bash functions from stop_hook.sh in isolation for unit testing.
|
||||
|
||||
This helper enables testing individual bash functions without executing the main script.
|
||||
"""
|
||||
|
||||
import os
|
||||
import shlex
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class BashRunner:
|
||||
"""Execute bash functions from stop_hook.sh in isolation"""
|
||||
|
||||
def __init__(self, script_path: str = "/Users/tanushreesharma/tracing-claude-code/stop_hook.sh"):
|
||||
self.script_path = script_path
|
||||
if not Path(script_path).exists():
|
||||
raise FileNotFoundError(f"Script not found: {script_path}")
|
||||
|
||||
def call_function(self, func_name: str, *args: str, stdin: Optional[str] = None) -> str:
|
||||
"""
|
||||
Call a bash function with arguments.
|
||||
|
||||
Args:
|
||||
func_name: Name of the function to call
|
||||
*args: Arguments to pass to the function
|
||||
stdin: Optional stdin input for the function
|
||||
|
||||
Returns:
|
||||
stdout from function execution
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the function execution fails
|
||||
"""
|
||||
# Create a script that sources stop_hook.sh (skip main execution) and calls the function
|
||||
# We use sed to remove everything from 'main' onwards and the early exit check
|
||||
quoted_args = ' '.join(shlex.quote(arg) for arg in args)
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
set -o pipefail
|
||||
|
||||
# Source functions from stop_hook.sh (skip main execution and early exit)
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' {shlex.quote(self.script_path)})
|
||||
|
||||
# Call target function
|
||||
{func_name} {quoted_args}
|
||||
"""
|
||||
|
||||
env = {
|
||||
**os.environ,
|
||||
"TRACE_TO_LANGSMITH": "false", # Disable hook during testing
|
||||
"CC_LANGSMITH_DEBUG": "false", # Disable debug logging
|
||||
}
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
input=stdin,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30,
|
||||
env=env
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
error_msg = f"Function {func_name} failed with exit code {result.returncode}\n"
|
||||
error_msg += f"STDOUT: {result.stdout}\n"
|
||||
error_msg += f"STDERR: {result.stderr}\n"
|
||||
error_msg += f"SCRIPT:\n{script}"
|
||||
raise RuntimeError(error_msg)
|
||||
|
||||
return result.stdout.strip()
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
raise RuntimeError(f"Function {func_name} timed out after 30 seconds")
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Failed to execute function {func_name}: {str(e)}")
|
||||
|
||||
def call_with_stdin(self, func_name: str, stdin: str, *args: str) -> str:
|
||||
"""
|
||||
Call function with stdin input (convenience method).
|
||||
|
||||
Args:
|
||||
func_name: Name of the function to call
|
||||
stdin: Input to pipe to the function
|
||||
*args: Arguments to pass to the function
|
||||
|
||||
Returns:
|
||||
stdout from function execution
|
||||
"""
|
||||
return self.call_function(func_name, *args, stdin=stdin)
|
||||
|
||||
def get_function_source(self, func_name: str) -> str:
|
||||
"""
|
||||
Extract the source code of a specific function.
|
||||
|
||||
Useful for debugging or documentation purposes.
|
||||
|
||||
Args:
|
||||
func_name: Name of the function
|
||||
|
||||
Returns:
|
||||
The function source code
|
||||
"""
|
||||
script = f"""
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main()/,$d' {shlex.quote(self.script_path)})
|
||||
declare -f {func_name}
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Function {func_name} not found")
|
||||
|
||||
return result.stdout.strip()
|
||||
|
||||
def list_functions(self) -> list[str]:
|
||||
"""
|
||||
List all functions defined in stop_hook.sh.
|
||||
|
||||
Returns:
|
||||
List of function names
|
||||
"""
|
||||
script = f"""
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main()/,$d' {shlex.quote(self.script_path)})
|
||||
declare -F | awk '{{print $3}}'
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
return []
|
||||
|
||||
return [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
|
||||
@@ -0,0 +1,167 @@
|
||||
"""
|
||||
State file management utilities for tests.
|
||||
|
||||
Provides helpers for managing langsmith_state.json during testing.
|
||||
"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
|
||||
|
||||
class StateManager:
|
||||
"""Manage langsmith_state.json for tests"""
|
||||
|
||||
def __init__(self, state_file: Path):
|
||||
self.state_file = Path(state_file)
|
||||
|
||||
def load(self) -> dict:
|
||||
"""
|
||||
Load state from file.
|
||||
|
||||
Returns:
|
||||
State dictionary (empty dict if file doesn't exist)
|
||||
"""
|
||||
if not self.state_file.exists():
|
||||
return {}
|
||||
|
||||
try:
|
||||
return json.loads(self.state_file.read_text())
|
||||
except (json.JSONDecodeError, IOError):
|
||||
return {}
|
||||
|
||||
def save(self, state: dict):
|
||||
"""
|
||||
Save state to file.
|
||||
|
||||
Args:
|
||||
state: State dictionary to save
|
||||
"""
|
||||
self.state_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.state_file.write_text(json.dumps(state, indent=2))
|
||||
|
||||
def get_session_state(self, session_id: str) -> dict:
|
||||
"""
|
||||
Get state for a specific session.
|
||||
|
||||
Args:
|
||||
session_id: Session ID to look up
|
||||
|
||||
Returns:
|
||||
Session state dictionary (empty dict if not found)
|
||||
"""
|
||||
state = self.load()
|
||||
return state.get(session_id, {})
|
||||
|
||||
def update_session(self, session_id: str, **kwargs):
|
||||
"""
|
||||
Update session state with new values.
|
||||
|
||||
Args:
|
||||
session_id: Session ID to update
|
||||
**kwargs: Key-value pairs to update in the session state
|
||||
"""
|
||||
state = self.load()
|
||||
|
||||
if session_id not in state:
|
||||
state[session_id] = {}
|
||||
|
||||
state[session_id].update(kwargs)
|
||||
self.save(state)
|
||||
|
||||
def set_session_state(self, session_id: str, session_state: dict):
|
||||
"""
|
||||
Set complete session state (replaces existing).
|
||||
|
||||
Args:
|
||||
session_id: Session ID
|
||||
session_state: New session state dictionary
|
||||
"""
|
||||
state = self.load()
|
||||
state[session_id] = session_state
|
||||
self.save(state)
|
||||
|
||||
def delete_session(self, session_id: str):
|
||||
"""
|
||||
Delete a session from state.
|
||||
|
||||
Args:
|
||||
session_id: Session ID to delete
|
||||
"""
|
||||
state = self.load()
|
||||
if session_id in state:
|
||||
del state[session_id]
|
||||
self.save(state)
|
||||
|
||||
def clear(self):
|
||||
"""Clear all state (delete the file)."""
|
||||
if self.state_file.exists():
|
||||
self.state_file.unlink()
|
||||
|
||||
def exists(self) -> bool:
|
||||
"""
|
||||
Check if state file exists.
|
||||
|
||||
Returns:
|
||||
True if file exists
|
||||
"""
|
||||
return self.state_file.exists()
|
||||
|
||||
def get_last_line(self, session_id: str) -> int:
|
||||
"""
|
||||
Get the last processed line number for a session.
|
||||
|
||||
Args:
|
||||
session_id: Session ID
|
||||
|
||||
Returns:
|
||||
Last line number (0 if not found)
|
||||
"""
|
||||
session_state = self.get_session_state(session_id)
|
||||
return session_state.get("last_line", 0)
|
||||
|
||||
def get_turn_count(self, session_id: str) -> int:
|
||||
"""
|
||||
Get the turn count for a session.
|
||||
|
||||
Args:
|
||||
session_id: Session ID
|
||||
|
||||
Returns:
|
||||
Turn count (0 if not found)
|
||||
"""
|
||||
session_state = self.get_session_state(session_id)
|
||||
return session_state.get("turn_count", 0)
|
||||
|
||||
def set_last_line(self, session_id: str, last_line: int):
|
||||
"""
|
||||
Set the last processed line number for a session.
|
||||
|
||||
Args:
|
||||
session_id: Session ID
|
||||
last_line: Line number
|
||||
"""
|
||||
self.update_session(session_id, last_line=last_line)
|
||||
|
||||
def set_turn_count(self, session_id: str, turn_count: int):
|
||||
"""
|
||||
Set the turn count for a session.
|
||||
|
||||
Args:
|
||||
session_id: Session ID
|
||||
turn_count: Number of turns
|
||||
"""
|
||||
self.update_session(session_id, turn_count=turn_count)
|
||||
|
||||
def list_sessions(self) -> list[str]:
|
||||
"""
|
||||
Get list of all session IDs in state.
|
||||
|
||||
Returns:
|
||||
List of session ID strings
|
||||
"""
|
||||
state = self.load()
|
||||
return list(state.keys())
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"StateManager({self.state_file})"
|
||||
@@ -0,0 +1,17 @@
|
||||
# Testing framework
|
||||
pytest==8.3.4
|
||||
pytest-asyncio==0.24.0
|
||||
pytest-mock==3.14.0
|
||||
pytest-timeout==2.3.1
|
||||
pytest-cov==6.0.0
|
||||
|
||||
# JSONL parsing
|
||||
jsonlines==4.0.0
|
||||
|
||||
# Subprocess handling
|
||||
subprocess-tee==0.4.2
|
||||
|
||||
# Already installed in parent environment:
|
||||
# langsmith==0.4.58
|
||||
# langchain==1.1.3
|
||||
# langchain-core==1.1.3
|
||||
@@ -0,0 +1,2 @@
|
||||
{"type": "user", "role": "user", "content": "Hello", "timestamp": "2025-12-23T00:38:30.199307Z"}
|
||||
{"type": "assistant", "message": {"id": "msg_5a37288a", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "Hi there!"}], "usage": {"input_tokens": 10, "output_tokens": 5}}, "timestamp": "2025-12-23T00:38:30.199503Z"}
|
||||
@@ -0,0 +1,6 @@
|
||||
{"type": "user", "role": "user", "content": "Question 1", "timestamp": "2025-12-23T00:38:30.199818Z"}
|
||||
{"type": "assistant", "message": {"id": "msg_63ecf218", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "Answer 1"}], "usage": {"input_tokens": 10, "output_tokens": 5}}, "timestamp": "2025-12-23T00:38:30.199832Z"}
|
||||
{"type": "user", "role": "user", "content": "Question 2", "timestamp": "2025-12-23T00:38:30.199834Z"}
|
||||
{"type": "assistant", "message": {"id": "msg_d3b3a286", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "Answer 2"}], "usage": {"input_tokens": 10, "output_tokens": 5}}, "timestamp": "2025-12-23T00:38:30.199840Z"}
|
||||
{"type": "user", "role": "user", "content": "Question 3", "timestamp": "2025-12-23T00:38:30.199841Z"}
|
||||
{"type": "assistant", "message": {"id": "msg_6a863ce1", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "Answer 3"}], "usage": {"input_tokens": 10, "output_tokens": 5}}, "timestamp": "2025-12-23T00:38:30.199845Z"}
|
||||
@@ -0,0 +1,5 @@
|
||||
{"type": "user", "role": "user", "content": "Tell me a story", "timestamp": "2025-12-23T00:38:30.200150Z"}
|
||||
{"type": "assistant", "message": {"id": "msg_3cc4f9e4", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "Once "}], "usage": {"input_tokens": 10, "output_tokens": 2}}, "timestamp": "2025-12-23T00:38:30.200157Z"}
|
||||
{"type": "assistant", "message": {"id": "msg_3cc4f9e4", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "upon "}], "usage": {"input_tokens": 10, "output_tokens": 4}}, "timestamp": "2025-12-23T00:38:30.200159Z"}
|
||||
{"type": "assistant", "message": {"id": "msg_3cc4f9e4", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "a "}], "usage": {"input_tokens": 10, "output_tokens": 6}}, "timestamp": "2025-12-23T00:38:30.200161Z"}
|
||||
{"type": "assistant", "message": {"id": "msg_3cc4f9e4", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "time..."}], "usage": {"input_tokens": 10, "output_tokens": 8}}, "timestamp": "2025-12-23T00:38:30.200162Z"}
|
||||
@@ -0,0 +1,4 @@
|
||||
{"type": "user", "role": "user", "content": "Read file test.txt", "timestamp": "2025-12-23T00:38:30.200007Z"}
|
||||
{"type": "assistant", "message": {"id": "msg_ff5a77ea", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "I'll read that file."}, {"type": "tool_use", "id": "tool_0d13cad9", "name": "Read", "input": {"file_path": "/test/test.txt"}}], "usage": {"input_tokens": 10, "output_tokens": 15}}, "timestamp": "2025-12-23T00:38:30.200018Z"}
|
||||
{"type": "user", "role": "user", "content": [{"type": "tool_result", "tool_use_id": "tool_0d13cad9", "content": "File content: hello world"}], "timestamp": "2025-12-23T00:38:30.200021Z"}
|
||||
{"type": "assistant", "message": {"id": "msg_c47e16d2", "role": "assistant", "model": "claude-sonnet-4-5-20250929", "content": [{"type": "text", "text": "The file says: hello world"}], "usage": {"input_tokens": 10, "output_tokens": 5}}, "timestamp": "2025-12-23T00:38:30.200026Z"}
|
||||
@@ -0,0 +1,402 @@
|
||||
"""
|
||||
Unit tests for API call functions in stop_hook.sh.
|
||||
|
||||
Tests:
|
||||
- api_call() - HTTP request handling, error codes, timeouts
|
||||
- send_multipart_batch() - Batch sending via multipart endpoint
|
||||
- cleanup_pending_turn() - Cleanup on early exit
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestApiCallFunction:
|
||||
"""Tests for api_call() function"""
|
||||
|
||||
def test_api_call_constructs_correct_curl_command(self, bash_executor, tmp_path):
|
||||
"""Test that api_call constructs curl command with correct headers"""
|
||||
# We can't easily test actual curl calls, but we can verify the function exists
|
||||
# and has the right structure
|
||||
source = bash_executor.get_function_source("api_call")
|
||||
|
||||
# Verify key components are present
|
||||
assert "x-api-key:" in source
|
||||
assert "Content-Type: application/json" in source
|
||||
assert "curl" in source
|
||||
assert "--max-time" in source
|
||||
|
||||
def test_api_call_handles_method_parameter(self, bash_executor):
|
||||
"""Test that api_call accepts different HTTP methods"""
|
||||
source = bash_executor.get_function_source("api_call")
|
||||
|
||||
# Should use $method variable in curl -X
|
||||
assert "-X" in source
|
||||
assert "method" in source
|
||||
|
||||
def test_api_call_uses_api_base_url(self, bash_executor):
|
||||
"""Test that api_call uses the API base URL"""
|
||||
source = bash_executor.get_function_source("api_call")
|
||||
|
||||
# Should reference API_BASE and endpoint
|
||||
assert "API_BASE" in source
|
||||
assert "endpoint" in source
|
||||
|
||||
def test_api_call_extracts_http_code(self, bash_executor):
|
||||
"""Test that api_call extracts and checks HTTP response code"""
|
||||
source = bash_executor.get_function_source("api_call")
|
||||
|
||||
# Should extract http_code from response
|
||||
assert "http_code" in source
|
||||
assert "%{http_code}" in source
|
||||
|
||||
def test_api_call_returns_error_on_4xx(self, bash_executor):
|
||||
"""Test that api_call returns error for 4xx responses"""
|
||||
source = bash_executor.get_function_source("api_call")
|
||||
|
||||
# Should check for error codes
|
||||
assert "200" in source
|
||||
assert "300" in source
|
||||
assert "return 1" in source
|
||||
|
||||
def test_api_call_logs_errors(self, bash_executor):
|
||||
"""Test that api_call logs errors on failure"""
|
||||
source = bash_executor.get_function_source("api_call")
|
||||
|
||||
# Should log errors
|
||||
assert "log" in source
|
||||
assert "ERROR" in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestApiCallErrorHandling:
|
||||
"""Tests for API call error handling scenarios"""
|
||||
|
||||
def test_api_call_structure_for_post(self):
|
||||
"""Test api_call structure for POST requests"""
|
||||
# Read the actual function to verify POST handling
|
||||
script = """
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
declare -f api_call
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
source = result.stdout
|
||||
|
||||
# Verify it handles data parameter for POST
|
||||
assert "-d" in source
|
||||
assert "data" in source
|
||||
|
||||
def test_api_call_structure_for_patch(self):
|
||||
"""Test api_call structure for PATCH requests"""
|
||||
script = """
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
declare -f api_call
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
# PATCH uses same structure as POST with -X PATCH
|
||||
source = result.stdout
|
||||
assert "method" in source
|
||||
|
||||
def test_api_call_has_timeout(self):
|
||||
"""Test that api_call has a timeout configured"""
|
||||
script = """
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
declare -f api_call
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
source = result.stdout
|
||||
assert "--max-time" in source
|
||||
assert "60" in source # 60 second timeout
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestSendMultipartBatch:
|
||||
"""Tests for send_multipart_batch() function"""
|
||||
|
||||
def test_send_multipart_batch_exists(self, bash_executor):
|
||||
"""Test that send_multipart_batch function exists"""
|
||||
source = bash_executor.get_function_source("send_multipart_batch")
|
||||
assert "send_multipart_batch" in source
|
||||
|
||||
def test_send_multipart_batch_handles_empty_batch(self, bash_executor):
|
||||
"""Test that empty batch is handled gracefully"""
|
||||
source = bash_executor.get_function_source("send_multipart_batch")
|
||||
|
||||
# Should check for empty batch
|
||||
assert "batch_size" in source or "length" in source
|
||||
assert "0" in source
|
||||
|
||||
def test_send_multipart_batch_creates_temp_dir(self, bash_executor):
|
||||
"""Test that temp directory is created for batch files"""
|
||||
source = bash_executor.get_function_source("send_multipart_batch")
|
||||
|
||||
assert "mktemp -d" in source
|
||||
assert "temp_dir" in source
|
||||
|
||||
def test_send_multipart_batch_cleans_up_temp_files(self, bash_executor):
|
||||
"""Test that temp files are cleaned up after sending"""
|
||||
source = bash_executor.get_function_source("send_multipart_batch")
|
||||
|
||||
# Should remove temp directory
|
||||
assert "rm -rf" in source
|
||||
|
||||
def test_send_multipart_batch_uses_multipart_endpoint(self, bash_executor):
|
||||
"""Test that multipart endpoint is used"""
|
||||
source = bash_executor.get_function_source("send_multipart_batch")
|
||||
|
||||
assert "/runs/multipart" in source
|
||||
|
||||
def test_send_multipart_batch_handles_post_operation(self, bash_executor):
|
||||
"""Test handling of 'post' operation"""
|
||||
source = bash_executor.get_function_source("send_multipart_batch")
|
||||
|
||||
# Should handle operation parameter
|
||||
assert "operation" in source
|
||||
assert "post" in source.lower() or "POST" in source
|
||||
|
||||
def test_send_multipart_batch_handles_patch_operation(self, bash_executor):
|
||||
"""Test handling of 'patch' operation via operation parameter"""
|
||||
source = bash_executor.get_function_source("send_multipart_batch")
|
||||
|
||||
# Patch operations use POST to multipart endpoint but with 'patch' in part names
|
||||
# The operation parameter is passed to serialize_for_multipart for part naming
|
||||
assert "operation" in source
|
||||
assert "serialize_for_multipart" in source
|
||||
|
||||
def test_send_multipart_batch_logs_success(self, bash_executor):
|
||||
"""Test that successful batch is logged"""
|
||||
source = bash_executor.get_function_source("send_multipart_batch")
|
||||
|
||||
assert "log" in source
|
||||
assert "INFO" in source
|
||||
assert "succeeded" in source.lower() or "success" in source.lower()
|
||||
|
||||
def test_send_multipart_batch_logs_failure(self, bash_executor):
|
||||
"""Test that failed batch is logged"""
|
||||
source = bash_executor.get_function_source("send_multipart_batch")
|
||||
|
||||
assert "ERROR" in source
|
||||
assert "failed" in source.lower()
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestCleanupPendingTurn:
|
||||
"""Tests for cleanup_pending_turn() function"""
|
||||
|
||||
def test_cleanup_function_exists(self, bash_executor):
|
||||
"""Test that cleanup_pending_turn function exists"""
|
||||
source = bash_executor.get_function_source("cleanup_pending_turn")
|
||||
assert "cleanup_pending_turn" in source
|
||||
|
||||
def test_cleanup_checks_current_turn_id(self, bash_executor):
|
||||
"""Test that cleanup checks if there's a pending turn"""
|
||||
source = bash_executor.get_function_source("cleanup_pending_turn")
|
||||
|
||||
# Should check CURRENT_TURN_ID
|
||||
assert "CURRENT_TURN_ID" in source
|
||||
assert "-n" in source # Test for non-empty
|
||||
|
||||
def test_cleanup_sends_patch_request(self, bash_executor):
|
||||
"""Test that cleanup patches the pending run"""
|
||||
source = bash_executor.get_function_source("cleanup_pending_turn")
|
||||
|
||||
# Should call api_call with PATCH
|
||||
assert "PATCH" in source
|
||||
assert "/runs/" in source
|
||||
|
||||
def test_cleanup_sets_error_message(self, bash_executor):
|
||||
"""Test that cleanup sets appropriate error message"""
|
||||
source = bash_executor.get_function_source("cleanup_pending_turn")
|
||||
|
||||
# Should include error message
|
||||
assert "error" in source.lower()
|
||||
assert "early" in source.lower() or "incomplete" in source.lower()
|
||||
|
||||
def test_cleanup_sets_end_time(self, bash_executor):
|
||||
"""Test that cleanup sets end_time for the run"""
|
||||
source = bash_executor.get_function_source("cleanup_pending_turn")
|
||||
|
||||
assert "end_time" in source
|
||||
|
||||
def test_cleanup_is_set_as_trap(self):
|
||||
"""Test that cleanup_pending_turn is set as EXIT trap"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should have trap set for cleanup
|
||||
assert "trap cleanup_pending_turn EXIT" in content
|
||||
|
||||
def test_cleanup_ignores_errors(self, bash_executor):
|
||||
"""Test that cleanup ignores errors (since we're exiting anyway)"""
|
||||
source = bash_executor.get_function_source("cleanup_pending_turn")
|
||||
|
||||
# Should have || true to ignore errors
|
||||
assert "|| true" in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestApiKeyHandling:
|
||||
"""Tests for API key configuration"""
|
||||
|
||||
def test_api_key_from_cc_langsmith_api_key(self):
|
||||
"""Test that CC_LANGSMITH_API_KEY is checked first"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "CC_LANGSMITH_API_KEY" in content
|
||||
|
||||
def test_api_key_fallback_to_langsmith_api_key(self):
|
||||
"""Test fallback to LANGSMITH_API_KEY"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should have fallback syntax
|
||||
assert '${CC_LANGSMITH_API_KEY:-$LANGSMITH_API_KEY}' in content
|
||||
|
||||
def test_api_key_validation(self):
|
||||
"""Test that missing API key is handled"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should check if API_KEY is empty
|
||||
assert '-z "$API_KEY"' in content
|
||||
assert "not set" in content.lower() or "ERROR" in content
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestHttpResponseHandling:
|
||||
"""Tests for HTTP response code handling"""
|
||||
|
||||
def test_success_codes_accepted(self):
|
||||
"""Test that 2xx codes are treated as success"""
|
||||
script = """
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
declare -f api_call
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
source = result.stdout
|
||||
|
||||
# Check for 200-299 range logic
|
||||
assert "200" in source
|
||||
assert "300" in source
|
||||
|
||||
def test_4xx_codes_logged_as_error(self):
|
||||
"""Test that 4xx codes are logged as errors"""
|
||||
script = """
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
declare -f api_call
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
source = result.stdout
|
||||
|
||||
# Should log HTTP code on error
|
||||
assert "HTTP" in source
|
||||
assert "http_code" in source
|
||||
|
||||
def test_response_body_logged_on_error(self):
|
||||
"""Test that response body is logged on error"""
|
||||
script = """
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
declare -f api_call
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
source = result.stdout
|
||||
|
||||
# Should log response
|
||||
assert "response" in source.lower()
|
||||
|
||||
def test_request_data_logged_on_error(self):
|
||||
"""Test that request data is logged (truncated) on error"""
|
||||
script = """
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
declare -f api_call
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
source = result.stdout
|
||||
|
||||
# Should log request data (truncated to 500 chars)
|
||||
assert "data" in source
|
||||
assert "500" in source # Truncation limit
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestProjectConfiguration:
|
||||
"""Tests for project configuration"""
|
||||
|
||||
def test_project_name_from_env(self):
|
||||
"""Test that project name comes from CC_LANGSMITH_PROJECT"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "CC_LANGSMITH_PROJECT" in content
|
||||
|
||||
def test_project_name_default(self):
|
||||
"""Test that project has default value"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should have default: "claude-code"
|
||||
assert '${CC_LANGSMITH_PROJECT:-claude-code}' in content
|
||||
|
||||
def test_api_base_url(self):
|
||||
"""Test that API base URL is configured"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "api.smith.langchain.com" in content
|
||||
@@ -0,0 +1,238 @@
|
||||
"""
|
||||
Unit tests for content formatting functions from stop_hook.sh.
|
||||
|
||||
Tests:
|
||||
- format_content() - Convert content to LangSmith format
|
||||
- merge_assistant_parts() - Merge SSE streaming parts
|
||||
- get_usage_from_parts() - Extract token usage from parts
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestFormatContent:
|
||||
"""Tests for format_content() function"""
|
||||
|
||||
def test_formats_string_content(self, bash_executor):
|
||||
"""Test converting string to LangSmith format"""
|
||||
msg = json.dumps({"content": "hello world"})
|
||||
result = bash_executor.call_function("format_content", msg)
|
||||
formatted = json.loads(result)
|
||||
|
||||
assert isinstance(formatted, list)
|
||||
assert len(formatted) == 1
|
||||
assert formatted[0]["type"] == "text"
|
||||
assert formatted[0]["text"] == "hello world"
|
||||
|
||||
def test_formats_array_content(self, bash_executor):
|
||||
"""Test formatting array with multiple content types"""
|
||||
msg = json.dumps({
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "analyzing..."},
|
||||
{"type": "text", "text": "result"},
|
||||
{"type": "tool_use", "id": "t1", "name": "Read", "input": {}}
|
||||
]
|
||||
})
|
||||
result = bash_executor.call_function("format_content", msg)
|
||||
formatted = json.loads(result)
|
||||
|
||||
assert len(formatted) == 3
|
||||
assert formatted[0]["type"] == "thinking"
|
||||
assert formatted[1]["type"] == "text"
|
||||
# tool_use should be converted to tool_call
|
||||
assert formatted[2]["type"] == "tool_call"
|
||||
assert formatted[2]["name"] == "Read"
|
||||
|
||||
def test_converts_tool_use_to_tool_call(self, bash_executor):
|
||||
"""Test that tool_use blocks are converted to tool_call"""
|
||||
msg = json.dumps({
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "tool_123",
|
||||
"name": "Bash",
|
||||
"input": {"command": "ls"}
|
||||
}
|
||||
]
|
||||
})
|
||||
result = bash_executor.call_function("format_content", msg)
|
||||
formatted = json.loads(result)
|
||||
|
||||
assert len(formatted) == 1
|
||||
assert formatted[0]["type"] == "tool_call"
|
||||
assert formatted[0]["id"] == "tool_123"
|
||||
assert formatted[0]["name"] == "Bash"
|
||||
assert formatted[0]["args"] == {"command": "ls"}
|
||||
|
||||
def test_handles_empty_content(self, bash_executor):
|
||||
"""Test default for empty/null content"""
|
||||
msg = json.dumps({"content": []})
|
||||
result = bash_executor.call_function("format_content", msg)
|
||||
formatted = json.loads(result)
|
||||
|
||||
# Should return default text block
|
||||
assert len(formatted) == 1
|
||||
assert formatted[0]["type"] == "text"
|
||||
assert formatted[0]["text"] == ""
|
||||
|
||||
def test_handles_null_content(self, bash_executor):
|
||||
"""Test handling null content"""
|
||||
msg = json.dumps({"content": None})
|
||||
result = bash_executor.call_function("format_content", msg)
|
||||
formatted = json.loads(result)
|
||||
|
||||
assert len(formatted) == 1
|
||||
assert formatted[0]["type"] == "text"
|
||||
assert formatted[0]["text"] == ""
|
||||
|
||||
def test_handles_missing_content(self, bash_executor):
|
||||
"""Test handling messages without content field"""
|
||||
msg = json.dumps({"message": {"id": "123"}})
|
||||
result = bash_executor.call_function("format_content", msg)
|
||||
formatted = json.loads(result)
|
||||
|
||||
assert len(formatted) == 1
|
||||
assert formatted[0]["type"] == "text"
|
||||
assert formatted[0]["text"] == ""
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestMergeAssistantParts:
|
||||
"""Tests for merge_assistant_parts() function"""
|
||||
|
||||
def test_merges_multiple_parts_with_same_id(self, bash_executor, sample_streaming_parts):
|
||||
"""Test merging SSE streaming parts"""
|
||||
parts_json = json.dumps(sample_streaming_parts)
|
||||
result = bash_executor.call_function("merge_assistant_parts", parts_json)
|
||||
merged = json.loads(result)
|
||||
|
||||
# Check structure
|
||||
assert "message" in merged
|
||||
assert "content" in merged["message"]
|
||||
|
||||
# Check content was merged
|
||||
content = merged["message"]["content"]
|
||||
assert len(content) == 1
|
||||
assert content[0]["type"] == "text"
|
||||
assert content[0]["text"] == "Hello world!"
|
||||
|
||||
# Check usage is from last part (cumulative)
|
||||
assert "_usage" in merged["message"]
|
||||
assert merged["message"]["_usage"]["output_tokens"] == 5
|
||||
|
||||
def test_merges_text_blocks_only(self, bash_executor):
|
||||
"""Test that only adjacent text blocks are merged"""
|
||||
parts = [
|
||||
{
|
||||
"message": {
|
||||
"id": "msg_1",
|
||||
"content": [{"type": "text", "text": "Part 1 "}],
|
||||
"usage": {"input_tokens": 10, "output_tokens": 2}
|
||||
}
|
||||
},
|
||||
{
|
||||
"message": {
|
||||
"id": "msg_1",
|
||||
"content": [{"type": "text", "text": "Part 2"}],
|
||||
"usage": {"input_tokens": 10, "output_tokens": 4}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
result = bash_executor.call_function("merge_assistant_parts", json.dumps(parts))
|
||||
merged = json.loads(result)
|
||||
|
||||
content = merged["message"]["content"]
|
||||
assert len(content) == 1
|
||||
assert content[0]["text"] == "Part 1 Part 2"
|
||||
|
||||
def test_preserves_non_text_content(self, bash_executor):
|
||||
"""Test that tool_use blocks are not merged"""
|
||||
parts = [
|
||||
{
|
||||
"message": {
|
||||
"id": "msg_1",
|
||||
"content": [
|
||||
{"type": "text", "text": "Calling tool"},
|
||||
{"type": "tool_use", "id": "t1", "name": "Read", "input": {}}
|
||||
],
|
||||
"usage": {"input_tokens": 10, "output_tokens": 5}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
result = bash_executor.call_function("merge_assistant_parts", json.dumps(parts))
|
||||
merged = json.loads(result)
|
||||
|
||||
content = merged["message"]["content"]
|
||||
assert len(content) == 2
|
||||
assert content[0]["type"] == "text"
|
||||
assert content[1]["type"] == "tool_use"
|
||||
|
||||
def test_handles_single_part(self, bash_executor):
|
||||
"""Test that single part is returned as-is"""
|
||||
parts = [
|
||||
{
|
||||
"message": {
|
||||
"id": "msg_1",
|
||||
"content": [{"type": "text", "text": "Single part"}],
|
||||
"usage": {"input_tokens": 10, "output_tokens": 5}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
result = bash_executor.call_function("merge_assistant_parts", json.dumps(parts))
|
||||
merged = json.loads(result)
|
||||
|
||||
content = merged["message"]["content"]
|
||||
assert len(content) == 1
|
||||
assert content[0]["text"] == "Single part"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestGetUsageFromParts:
|
||||
"""Tests for get_usage_from_parts() function"""
|
||||
|
||||
def test_extracts_usage_from_last_part(self, bash_executor, sample_streaming_parts):
|
||||
"""Test extracting usage from last part (cumulative tokens)"""
|
||||
parts_json = json.dumps(sample_streaming_parts)
|
||||
result = bash_executor.call_function("get_usage_from_parts", parts_json)
|
||||
usage = json.loads(result)
|
||||
|
||||
# Should get usage from last part (cumulative)
|
||||
assert usage["input_tokens"] == 10
|
||||
assert usage["output_tokens"] == 5
|
||||
|
||||
def test_extracts_usage_with_cache_tokens(self, bash_executor):
|
||||
"""Test extracting usage with cache read tokens"""
|
||||
parts = [
|
||||
{
|
||||
"message": {
|
||||
"usage": {
|
||||
"input_tokens": 100,
|
||||
"output_tokens": 50,
|
||||
"cache_read_input_tokens": 1000,
|
||||
"cache_creation_input_tokens": 200
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
result = bash_executor.call_function("get_usage_from_parts", json.dumps(parts))
|
||||
usage = json.loads(result)
|
||||
|
||||
assert usage["input_tokens"] == 100
|
||||
assert usage["output_tokens"] == 50
|
||||
assert usage["cache_read_input_tokens"] == 1000
|
||||
assert usage["cache_creation_input_tokens"] == 200
|
||||
|
||||
def test_handles_missing_usage(self, bash_executor):
|
||||
"""Test handling parts without usage field"""
|
||||
parts = [{"message": {"content": [{"type": "text", "text": "hi"}]}}]
|
||||
|
||||
result = bash_executor.call_function("get_usage_from_parts", json.dumps(parts))
|
||||
|
||||
# Should return null or empty object
|
||||
assert result in ["null", "{}"]
|
||||
@@ -0,0 +1,202 @@
|
||||
"""
|
||||
Unit tests for cost tracking and usage metadata from stop_hook.sh.
|
||||
|
||||
These tests verify that token usage is correctly tracked for cost monitoring:
|
||||
- Total input tokens (including cache tokens)
|
||||
- Output tokens
|
||||
- Cache token breakdowns (creation vs read)
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestUsageMetadata:
|
||||
"""Tests for usage_metadata calculation (cost tracking)"""
|
||||
|
||||
def test_calculates_total_input_tokens_with_cache(self, bash_executor):
|
||||
"""Test that total input tokens includes cache tokens"""
|
||||
# This tests the jq logic at line 514:
|
||||
# input_tokens: ((.input_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0))
|
||||
|
||||
usage = {
|
||||
"input_tokens": 100,
|
||||
"output_tokens": 50,
|
||||
"cache_creation_input_tokens": 500,
|
||||
"cache_read_input_tokens": 2000
|
||||
}
|
||||
|
||||
# Build the usage_metadata jq expression
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
echo '{json.dumps(usage)}' | jq '{{
|
||||
input_tokens: ((.input_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0)),
|
||||
output_tokens: (.output_tokens // 0),
|
||||
input_token_details: {{
|
||||
cache_creation: (.cache_creation_input_tokens // 0),
|
||||
cache_read: (.cache_read_input_tokens // 0)
|
||||
}}
|
||||
}}'
|
||||
"""
|
||||
|
||||
result = bash_executor.call_function.__self__.call_function.__func__(
|
||||
bash_executor, "bash", "-c", script
|
||||
)
|
||||
metadata = json.loads(result)
|
||||
|
||||
# Total input = 100 + 500 + 2000 = 2600
|
||||
assert metadata["input_tokens"] == 2600
|
||||
assert metadata["output_tokens"] == 50
|
||||
assert metadata["input_token_details"]["cache_creation"] == 500
|
||||
assert metadata["input_token_details"]["cache_read"] == 2000
|
||||
|
||||
def test_handles_missing_cache_tokens(self, bash_executor):
|
||||
"""Test usage metadata when cache tokens are missing"""
|
||||
usage = {
|
||||
"input_tokens": 100,
|
||||
"output_tokens": 50
|
||||
# No cache tokens
|
||||
}
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
echo '{json.dumps(usage)}' | jq '{{
|
||||
input_tokens: ((.input_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0)),
|
||||
output_tokens: (.output_tokens // 0),
|
||||
input_token_details: {{
|
||||
cache_creation: (.cache_creation_input_tokens // 0),
|
||||
cache_read: (.cache_read_input_tokens // 0)
|
||||
}}
|
||||
}}'
|
||||
"""
|
||||
|
||||
result = bash_executor.call_function.__self__.call_function.__func__(
|
||||
bash_executor, "bash", "-c", script
|
||||
)
|
||||
metadata = json.loads(result)
|
||||
|
||||
# Total input = 100 + 0 + 0 = 100
|
||||
assert metadata["input_tokens"] == 100
|
||||
assert metadata["output_tokens"] == 50
|
||||
assert metadata["input_token_details"]["cache_creation"] == 0
|
||||
assert metadata["input_token_details"]["cache_read"] == 0
|
||||
|
||||
def test_realistic_usage_scenario(self, bash_executor):
|
||||
"""Test realistic usage from cc_transcript.jsonl"""
|
||||
# Real example from line 2 of cc_transcript.jsonl
|
||||
usage = {
|
||||
"input_tokens": 9,
|
||||
"cache_creation_input_tokens": 630,
|
||||
"cache_read_input_tokens": 18664,
|
||||
"output_tokens": 8
|
||||
}
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
echo '{json.dumps(usage)}' | jq '{{
|
||||
input_tokens: ((.input_tokens // 0) + (.cache_creation_input_tokens // 0) + (.cache_read_input_tokens // 0)),
|
||||
output_tokens: (.output_tokens // 0),
|
||||
input_token_details: {{
|
||||
cache_creation: (.cache_creation_input_tokens // 0),
|
||||
cache_read: (.cache_read_input_tokens // 0)
|
||||
}}
|
||||
}}'
|
||||
"""
|
||||
|
||||
result = bash_executor.call_function.__self__.call_function.__func__(
|
||||
bash_executor, "bash", "-c", script
|
||||
)
|
||||
metadata = json.loads(result)
|
||||
|
||||
# Total input = 9 + 630 + 18664 = 19303
|
||||
assert metadata["input_tokens"] == 19303
|
||||
assert metadata["output_tokens"] == 8
|
||||
assert metadata["input_token_details"]["cache_creation"] == 630
|
||||
assert metadata["input_token_details"]["cache_read"] == 18664
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestCostImplications:
|
||||
"""Tests verifying cost tracking implications"""
|
||||
|
||||
def test_cache_read_reduces_cost(self):
|
||||
"""Test that cache read tokens are tracked separately (they cost less)"""
|
||||
# Cache read tokens are ~90% cheaper than regular input tokens
|
||||
# This test verifies they're tracked in input_token_details
|
||||
|
||||
usage_with_cache = {
|
||||
"input_tokens": 10,
|
||||
"cache_read_input_tokens": 10000,
|
||||
"output_tokens": 50
|
||||
}
|
||||
|
||||
usage_without_cache = {
|
||||
"input_tokens": 10010, # Same total but all regular
|
||||
"output_tokens": 50
|
||||
}
|
||||
|
||||
# Both have same total input tokens (10010)
|
||||
# But usage_with_cache is much cheaper due to cache reads
|
||||
# The tracking in input_token_details enables cost calculation
|
||||
|
||||
assert usage_with_cache["input_tokens"] + usage_with_cache.get("cache_read_input_tokens", 0) == 10010
|
||||
assert usage_without_cache["input_tokens"] == 10010
|
||||
|
||||
# Verify cache breakdown is preserved for cost calculation
|
||||
assert usage_with_cache.get("cache_read_input_tokens") == 10000
|
||||
|
||||
def test_cache_creation_tracked_for_write_cost(self):
|
||||
"""Test that cache creation tokens are tracked (they cost more)"""
|
||||
# Cache creation tokens cost more (first write to cache)
|
||||
# This test verifies they're tracked separately
|
||||
|
||||
usage = {
|
||||
"input_tokens": 100,
|
||||
"cache_creation_input_tokens": 1000,
|
||||
"output_tokens": 50
|
||||
}
|
||||
|
||||
# Cache creation adds to total input but tracked separately
|
||||
total_input = usage["input_tokens"] + usage["cache_creation_input_tokens"]
|
||||
assert total_input == 1100
|
||||
|
||||
# Verify cache creation is preserved for cost calculation
|
||||
assert usage["cache_creation_input_tokens"] == 1000
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestUsageAggregation:
|
||||
"""Tests for usage aggregation across multiple LLM calls"""
|
||||
|
||||
def test_tracks_usage_per_assistant_message(self, bash_executor, sample_streaming_parts):
|
||||
"""Test that each assistant message has its own usage tracking"""
|
||||
# Each LLM call should have separate usage metadata
|
||||
# This is critical for per-call cost attribution
|
||||
|
||||
parts = sample_streaming_parts # From fixture
|
||||
result = bash_executor.call_function("get_usage_from_parts", json.dumps(parts))
|
||||
usage = json.loads(result)
|
||||
|
||||
# Verify usage is extracted (cumulative from streaming)
|
||||
assert "input_tokens" in usage
|
||||
assert "output_tokens" in usage
|
||||
assert usage["output_tokens"] == 5 # Last part has cumulative count
|
||||
|
||||
def test_usage_preserved_through_merge(self, bash_executor, sample_streaming_parts):
|
||||
"""Test that usage is preserved when merging streaming parts"""
|
||||
# When SSE parts are merged, usage should be preserved
|
||||
parts = json.dumps(sample_streaming_parts)
|
||||
result = bash_executor.call_function("merge_assistant_parts", parts)
|
||||
merged = json.loads(result)
|
||||
|
||||
# Usage should be in _usage field after merge
|
||||
assert "_usage" in merged["message"]
|
||||
assert merged["message"]["_usage"]["output_tokens"] == 5
|
||||
assert merged["message"]["_usage"]["input_tokens"] == 10
|
||||
@@ -0,0 +1,557 @@
|
||||
"""
|
||||
Unit tests for main() entry point in stop_hook.sh.
|
||||
|
||||
Tests:
|
||||
- Hook input parsing (session_id, transcript_path)
|
||||
- stop_hook_active flag handling
|
||||
- Incremental processing (last_line tracking)
|
||||
- Turn grouping logic
|
||||
- Message ID tracking for SSE streaming
|
||||
- State updates
|
||||
- Execution time tracking
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestHookInputParsing:
|
||||
"""Tests for parsing hook input JSON"""
|
||||
|
||||
def test_extracts_session_id(self):
|
||||
"""Test that session_id is extracted from hook input"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "session_id" in content
|
||||
assert '.session_id' in content # jq extraction
|
||||
|
||||
def test_extracts_transcript_path(self):
|
||||
"""Test that transcript_path is extracted from hook input"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "transcript_path" in content
|
||||
assert '.transcript_path' in content # jq extraction
|
||||
|
||||
def test_expands_tilde_in_transcript_path(self):
|
||||
"""Test that ~ is expanded to $HOME in transcript_path"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should use sed to replace ~
|
||||
assert 's|^~|$HOME|' in content
|
||||
|
||||
def test_validates_session_id_not_empty(self):
|
||||
"""Test that empty session_id is handled"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert '-z "$session_id"' in content
|
||||
|
||||
def test_validates_transcript_file_exists(self):
|
||||
"""Test that missing transcript file is handled"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert '! -f "$transcript_path"' in content
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestStopHookActiveFlag:
|
||||
"""Tests for stop_hook_active flag handling"""
|
||||
|
||||
def test_checks_stop_hook_active_flag(self):
|
||||
"""Test that stop_hook_active flag is checked"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "stop_hook_active" in content
|
||||
|
||||
def test_exits_when_stop_hook_active_is_true(self):
|
||||
"""Test that script exits when stop_hook_active is true"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert '.stop_hook_active == true' in content
|
||||
assert "exit 0" in content
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestIncrementalProcessing:
|
||||
"""Tests for incremental message processing via last_line tracking"""
|
||||
|
||||
def test_loads_state_for_last_line(self):
|
||||
"""Test that state is loaded to get last_line"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "load_state" in content
|
||||
assert "last_line" in content
|
||||
|
||||
def test_uses_awk_to_skip_processed_lines(self):
|
||||
"""Test that awk is used to skip already processed lines"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should use awk with NR > start
|
||||
assert "awk" in content
|
||||
assert "NR >" in content
|
||||
|
||||
def test_tracks_new_last_line(self):
|
||||
"""Test that new_last_line is tracked during processing"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "new_last_line" in content
|
||||
|
||||
def test_updates_state_with_new_last_line(self):
|
||||
"""Test that state is updated with new last_line"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "save_state" in content
|
||||
|
||||
def test_exits_early_if_no_new_messages(self):
|
||||
"""Test that script exits if no new messages"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "No new messages" in content or "exit 0" in content
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestTurnGrouping:
|
||||
"""Tests for grouping messages into turns"""
|
||||
|
||||
def test_tracks_current_user_message(self):
|
||||
"""Test that current user message is tracked"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "current_user" in content
|
||||
|
||||
def test_tracks_current_assistants_array(self):
|
||||
"""Test that current assistant messages are tracked as array"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "current_assistants" in content
|
||||
assert '"[]"' in content or "='[]'" in content
|
||||
|
||||
def test_tracks_current_tool_results(self):
|
||||
"""Test that current tool results are tracked"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "current_tool_results" in content
|
||||
|
||||
def test_identifies_user_role(self):
|
||||
"""Test that user role is identified"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should check for role == "user"
|
||||
assert '"user"' in content
|
||||
assert "role" in content
|
||||
|
||||
def test_identifies_assistant_role(self):
|
||||
"""Test that assistant role is identified"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert '"assistant"' in content
|
||||
|
||||
def test_new_user_starts_new_turn(self):
|
||||
"""Test that new user message starts a new turn"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# When user message is found (not tool result), should start new turn
|
||||
assert "current_user" in content
|
||||
assert 'current_user="$line"' in content
|
||||
|
||||
def test_tool_result_added_to_current_turn(self):
|
||||
"""Test that tool result is added to current turn"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "is_tool_result" in content
|
||||
assert "current_tool_results" in content
|
||||
|
||||
def test_creates_trace_when_turn_complete(self):
|
||||
"""Test that create_trace is called when turn is complete"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "create_trace" in content
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestSSEStreamingMerge:
|
||||
"""Tests for merging SSE streaming message parts"""
|
||||
|
||||
def test_tracks_current_msg_id(self):
|
||||
"""Test that current message ID is tracked for SSE parts"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "current_msg_id" in content
|
||||
|
||||
def test_tracks_current_assistant_parts(self):
|
||||
"""Test that assistant parts are tracked for merging"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "current_assistant_parts" in content
|
||||
|
||||
def test_same_msg_id_adds_to_parts(self):
|
||||
"""Test that same message ID adds to current parts"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should compare msg_id to current_msg_id
|
||||
assert '$msg_id" = "$current_msg_id"' in content or 'msg_id = "$current_msg_id"' in content
|
||||
|
||||
def test_different_msg_id_starts_new_message(self):
|
||||
"""Test that different message ID starts a new message"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should set current_msg_id to new msg_id
|
||||
assert 'current_msg_id="$msg_id"' in content
|
||||
|
||||
def test_merges_parts_before_new_message(self):
|
||||
"""Test that parts are merged before starting new message"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "merge_assistant_parts" in content
|
||||
|
||||
def test_extracts_message_id_from_line(self):
|
||||
"""Test that message ID is extracted from each line"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should extract .message.id via jq
|
||||
assert ".message.id" in content
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestStateUpdates:
|
||||
"""Tests for state file updates after processing"""
|
||||
|
||||
def test_updates_last_line_in_state(self):
|
||||
"""Test that last_line is updated in state"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "last_line" in content
|
||||
assert "new_last_line" in content
|
||||
|
||||
def test_updates_turn_count_in_state(self):
|
||||
"""Test that turn_count is updated in state"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "turn_count" in content
|
||||
|
||||
def test_updates_timestamp_in_state(self):
|
||||
"""Test that updated timestamp is set in state"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "updated" in content
|
||||
|
||||
def test_state_is_session_specific(self):
|
||||
"""Test that state is keyed by session_id"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should use session_id as key
|
||||
assert ".[$sid]" in content or '[$sid]' in content
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestExecutionTimeTracking:
|
||||
"""Tests for execution time tracking and warnings"""
|
||||
|
||||
def test_tracks_script_start_time(self):
|
||||
"""Test that script start time is recorded"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "script_start" in content
|
||||
|
||||
def test_tracks_script_end_time(self):
|
||||
"""Test that script end time is recorded"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "script_end" in content
|
||||
|
||||
def test_calculates_duration(self):
|
||||
"""Test that duration is calculated"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "duration" in content
|
||||
|
||||
def test_logs_execution_time(self):
|
||||
"""Test that execution time is logged"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should log processing time
|
||||
assert "duration" in content
|
||||
assert "log" in content
|
||||
|
||||
def test_warns_on_slow_execution(self):
|
||||
"""Test that warning is logged for slow execution (>3min)"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should warn if > 180 seconds
|
||||
assert "180" in content
|
||||
assert "WARN" in content
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestTracingDisabledCheck:
|
||||
"""Tests for early exit when tracing is disabled"""
|
||||
|
||||
def test_checks_trace_to_langsmith_env(self):
|
||||
"""Test that TRACE_TO_LANGSMITH is checked"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "TRACE_TO_LANGSMITH" in content
|
||||
|
||||
def test_case_insensitive_check(self):
|
||||
"""Test that check is case insensitive"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should use tr to lowercase
|
||||
assert "tr '[:upper:]' '[:lower:]'" in content
|
||||
|
||||
def test_exits_early_when_disabled(self):
|
||||
"""Test that script exits when tracing disabled"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should have early exit
|
||||
assert '!= "true"' in content
|
||||
assert "exit 0" in content
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestRequiredCommandChecks:
|
||||
"""Tests for required command availability checks"""
|
||||
|
||||
def test_checks_jq_available(self):
|
||||
"""Test that jq availability is checked"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "jq" in content
|
||||
assert "command -v" in content
|
||||
|
||||
def test_checks_curl_available(self):
|
||||
"""Test that curl availability is checked"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "curl" in content
|
||||
|
||||
def test_checks_uuidgen_available(self):
|
||||
"""Test that uuidgen availability is checked"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "uuidgen" in content
|
||||
|
||||
def test_exits_gracefully_if_command_missing(self):
|
||||
"""Test that script exits gracefully if required command missing"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should exit 0 (not error) if command missing
|
||||
assert "exit 0" in content
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestFinalTurnProcessing:
|
||||
"""Tests for processing the final turn at end of transcript"""
|
||||
|
||||
def test_processes_pending_assistant_parts(self):
|
||||
"""Test that pending assistant parts are merged at end"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should check for pending parts after loop
|
||||
assert "current_msg_id" in content
|
||||
assert "merge_assistant_parts" in content
|
||||
|
||||
def test_processes_final_turn(self):
|
||||
"""Test that final turn is processed after loop"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
# Should have processing after the while loop
|
||||
# Look for create_trace call after loop ends
|
||||
main_section = content[content.find("# Process final turn"):]
|
||||
assert "create_trace" in main_section
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestLoggingInMain:
|
||||
"""Tests for logging throughout main function"""
|
||||
|
||||
def test_logs_session_start(self):
|
||||
"""Test that session processing start is logged"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "Processing session" in content
|
||||
|
||||
def test_logs_message_count(self):
|
||||
"""Test that new message count is logged"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "new messages" in content
|
||||
|
||||
def test_logs_turns_processed(self):
|
||||
"""Test that turns processed count is logged"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "turns" in content
|
||||
|
||||
def test_logs_invalid_input_warning(self):
|
||||
"""Test that invalid input is logged as warning"""
|
||||
with open("/Users/tanushreesharma/tracing-claude-code/stop_hook.sh", "r") as f:
|
||||
content = f.read()
|
||||
|
||||
assert "WARN" in content
|
||||
assert "Invalid input" in content
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestMainIntegration:
|
||||
"""Integration tests for main() with mocked environment"""
|
||||
|
||||
def test_main_exits_when_tracing_disabled(self, tmp_path, monkeypatch):
|
||||
"""Test that main exits early when TRACE_TO_LANGSMITH is not true"""
|
||||
transcript = tmp_path / "transcript.jsonl"
|
||||
transcript.write_text('{"type": "user", "content": "hello"}\n')
|
||||
|
||||
hook_input = json.dumps({
|
||||
"session_id": "test-session",
|
||||
"transcript_path": str(transcript)
|
||||
})
|
||||
|
||||
script = f"""
|
||||
export TRACE_TO_LANGSMITH="false"
|
||||
export LOG_FILE="{tmp_path}/hook.log"
|
||||
cd /Users/tanushreesharma/tracing-claude-code
|
||||
echo '{hook_input}' | bash stop_hook.sh
|
||||
echo "Exit code: $?"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
# Should exit 0 (gracefully)
|
||||
assert "Exit code: 0" in result.stdout
|
||||
|
||||
def test_main_exits_when_missing_session_id(self, tmp_path):
|
||||
"""Test that main exits when session_id is empty"""
|
||||
transcript = tmp_path / "transcript.jsonl"
|
||||
transcript.write_text('{"type": "user", "content": "hello"}\n')
|
||||
|
||||
hook_input = json.dumps({
|
||||
"session_id": "",
|
||||
"transcript_path": str(transcript)
|
||||
})
|
||||
|
||||
script = f"""
|
||||
export TRACE_TO_LANGSMITH="true"
|
||||
export CC_LANGSMITH_API_KEY="test-key"
|
||||
export LOG_FILE="{tmp_path}/hook.log"
|
||||
cd /Users/tanushreesharma/tracing-claude-code
|
||||
echo '{hook_input}' | bash stop_hook.sh
|
||||
echo "Exit code: $?"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
# Should exit 0 (gracefully)
|
||||
assert "Exit code: 0" in result.stdout
|
||||
|
||||
def test_main_exits_when_transcript_missing(self, tmp_path):
|
||||
"""Test that main exits when transcript file doesn't exist"""
|
||||
hook_input = json.dumps({
|
||||
"session_id": "test-session",
|
||||
"transcript_path": str(tmp_path / "nonexistent.jsonl")
|
||||
})
|
||||
|
||||
script = f"""
|
||||
export TRACE_TO_LANGSMITH="true"
|
||||
export CC_LANGSMITH_API_KEY="test-key"
|
||||
export LOG_FILE="{tmp_path}/hook.log"
|
||||
cd /Users/tanushreesharma/tracing-claude-code
|
||||
echo '{hook_input}' | bash stop_hook.sh
|
||||
echo "Exit code: $?"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
# Should exit 0 (gracefully)
|
||||
assert "Exit code: 0" in result.stdout
|
||||
|
||||
def test_main_exits_when_stop_hook_active(self, tmp_path):
|
||||
"""Test that main exits when stop_hook_active is true"""
|
||||
transcript = tmp_path / "transcript.jsonl"
|
||||
transcript.write_text('{"type": "user", "content": "hello"}\n')
|
||||
|
||||
hook_input = json.dumps({
|
||||
"session_id": "test-session",
|
||||
"transcript_path": str(transcript),
|
||||
"stop_hook_active": True
|
||||
})
|
||||
|
||||
script = f"""
|
||||
export TRACE_TO_LANGSMITH="true"
|
||||
export CC_LANGSMITH_API_KEY="test-key"
|
||||
export LOG_FILE="{tmp_path}/hook.log"
|
||||
cd /Users/tanushreesharma/tracing-claude-code
|
||||
echo '{hook_input}' | bash stop_hook.sh
|
||||
echo "Exit code: $?"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
# Should exit 0
|
||||
assert "Exit code: 0" in result.stdout
|
||||
@@ -0,0 +1,195 @@
|
||||
"""
|
||||
Unit tests for message parsing functions from stop_hook.sh.
|
||||
|
||||
Tests:
|
||||
- get_content() - Extract content from messages
|
||||
- is_tool_result() - Identify tool result messages
|
||||
- get_tool_uses() - Extract tool_use blocks
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestGetContent:
|
||||
"""Tests for get_content() function"""
|
||||
|
||||
def test_get_content_from_message_wrapper(self, bash_executor):
|
||||
"""Test extracting content from {message: {content: ...}} format"""
|
||||
msg = json.dumps({"message": {"content": "hello"}})
|
||||
result = bash_executor.call_function("get_content", msg)
|
||||
assert result == '"hello"'
|
||||
|
||||
def test_get_content_from_direct_format(self, bash_executor):
|
||||
"""Test extracting content from {content: ...} format"""
|
||||
msg = json.dumps({"content": "world"})
|
||||
result = bash_executor.call_function("get_content", msg)
|
||||
assert result == '"world"'
|
||||
|
||||
def test_get_content_with_array(self, bash_executor):
|
||||
"""Test extracting array content"""
|
||||
msg = json.dumps({
|
||||
"content": [
|
||||
{"type": "text", "text": "hi"}
|
||||
]
|
||||
})
|
||||
result = bash_executor.call_function("get_content", msg)
|
||||
parsed = json.loads(result)
|
||||
assert isinstance(parsed, list)
|
||||
assert parsed[0]["type"] == "text"
|
||||
assert parsed[0]["text"] == "hi"
|
||||
|
||||
def test_get_content_returns_null_for_invalid(self, bash_executor):
|
||||
"""Test null return for invalid input"""
|
||||
msg = json.dumps({})
|
||||
result = bash_executor.call_function("get_content", msg)
|
||||
assert result == "null"
|
||||
|
||||
def test_get_content_with_nested_message(self, bash_executor, sample_assistant_message):
|
||||
"""Test extracting content from complex assistant message"""
|
||||
msg = json.dumps(sample_assistant_message)
|
||||
result = bash_executor.call_function("get_content", msg)
|
||||
parsed = json.loads(result)
|
||||
assert isinstance(parsed, list)
|
||||
assert len(parsed) == 3 # thinking + text + tool_use
|
||||
assert parsed[0]["type"] == "thinking"
|
||||
assert parsed[1]["type"] == "text"
|
||||
assert parsed[2]["type"] == "tool_use"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestIsToolResult:
|
||||
"""Tests for is_tool_result() function"""
|
||||
|
||||
def test_identifies_tool_result_message(self, bash_executor):
|
||||
"""Test identifying messages containing tool_result"""
|
||||
msg = json.dumps({
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "abc",
|
||||
"content": "result"
|
||||
}
|
||||
]
|
||||
})
|
||||
result = bash_executor.call_function("is_tool_result", msg)
|
||||
assert result == "true"
|
||||
|
||||
def test_identifies_tool_result_from_sample(self, bash_executor, sample_tool_result):
|
||||
"""Test identifying tool result using sample fixture"""
|
||||
msg = json.dumps(sample_tool_result)
|
||||
result = bash_executor.call_function("is_tool_result", msg)
|
||||
assert result == "true"
|
||||
|
||||
def test_rejects_non_tool_result(self, bash_executor):
|
||||
"""Test rejecting normal user messages"""
|
||||
msg = json.dumps({"role": "user", "content": "hello"})
|
||||
result = bash_executor.call_function("is_tool_result", msg)
|
||||
assert result == "false"
|
||||
|
||||
def test_rejects_assistant_message(self, bash_executor, sample_assistant_message):
|
||||
"""Test rejecting assistant messages (even with tool_use)"""
|
||||
msg = json.dumps(sample_assistant_message)
|
||||
result = bash_executor.call_function("is_tool_result", msg)
|
||||
assert result == "false"
|
||||
|
||||
def test_handles_string_content(self, bash_executor):
|
||||
"""Test handling string content (not array)"""
|
||||
msg = json.dumps({"role": "user", "content": "not an array"})
|
||||
result = bash_executor.call_function("is_tool_result", msg)
|
||||
assert result == "false"
|
||||
|
||||
def test_handles_empty_content_array(self, bash_executor):
|
||||
"""Test handling empty content array"""
|
||||
msg = json.dumps({"role": "user", "content": []})
|
||||
result = bash_executor.call_function("is_tool_result", msg)
|
||||
assert result == "false"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestGetToolUses:
|
||||
"""Tests for get_tool_uses() function"""
|
||||
|
||||
def test_extracts_tool_uses_from_content(self, bash_executor, sample_assistant_message):
|
||||
"""Test extracting tool_use blocks from assistant message"""
|
||||
msg = json.dumps(sample_assistant_message)
|
||||
result = bash_executor.call_function("get_tool_uses", msg)
|
||||
tools = json.loads(result)
|
||||
|
||||
assert isinstance(tools, list)
|
||||
assert len(tools) == 1
|
||||
assert tools[0]["type"] == "tool_use"
|
||||
assert tools[0]["name"] == "Read"
|
||||
assert tools[0]["id"] == "tool_test_abc"
|
||||
assert "input" in tools[0]
|
||||
|
||||
def test_extracts_multiple_tool_uses(self, bash_executor):
|
||||
"""Test extracting multiple tool_use blocks"""
|
||||
msg = json.dumps({
|
||||
"message": {
|
||||
"content": [
|
||||
{"type": "text", "text": "I'll use two tools"},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "tool_1",
|
||||
"name": "Read",
|
||||
"input": {"file": "a.txt"}
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "tool_2",
|
||||
"name": "Write",
|
||||
"input": {"file": "b.txt"}
|
||||
}
|
||||
]
|
||||
}
|
||||
})
|
||||
result = bash_executor.call_function("get_tool_uses", msg)
|
||||
tools = json.loads(result)
|
||||
|
||||
assert len(tools) == 2
|
||||
assert tools[0]["name"] == "Read"
|
||||
assert tools[1]["name"] == "Write"
|
||||
|
||||
def test_returns_empty_for_no_tools(self, bash_executor):
|
||||
"""Test empty array when no tool uses"""
|
||||
msg = json.dumps({
|
||||
"message": {
|
||||
"content": [{"type": "text", "text": "no tools"}]
|
||||
}
|
||||
})
|
||||
result = bash_executor.call_function("get_tool_uses", msg)
|
||||
tools = json.loads(result)
|
||||
assert tools == []
|
||||
|
||||
def test_handles_string_content(self, bash_executor):
|
||||
"""Test handling non-array content"""
|
||||
msg = json.dumps({"content": "string content"})
|
||||
result = bash_executor.call_function("get_tool_uses", msg)
|
||||
tools = json.loads(result)
|
||||
assert tools == []
|
||||
|
||||
def test_handles_missing_content(self, bash_executor):
|
||||
"""Test handling messages without content field"""
|
||||
msg = json.dumps({"message": {"id": "123"}})
|
||||
result = bash_executor.call_function("get_tool_uses", msg)
|
||||
tools = json.loads(result)
|
||||
assert tools == []
|
||||
|
||||
def test_handles_mixed_content_types(self, bash_executor):
|
||||
"""Test extracting tool_use from mixed content"""
|
||||
msg = json.dumps({
|
||||
"content": [
|
||||
{"type": "thinking", "thinking": "analyzing"},
|
||||
{"type": "text", "text": "result"},
|
||||
{"type": "tool_use", "id": "t1", "name": "Bash", "input": {}},
|
||||
{"type": "text", "text": "more text"}
|
||||
]
|
||||
})
|
||||
result = bash_executor.call_function("get_tool_uses", msg)
|
||||
tools = json.loads(result)
|
||||
|
||||
assert len(tools) == 1
|
||||
assert tools[0]["name"] == "Bash"
|
||||
@@ -0,0 +1,208 @@
|
||||
"""
|
||||
Unit tests for model name formatting from stop_hook.sh.
|
||||
|
||||
Tests verify that model names have date suffixes stripped:
|
||||
- claude-sonnet-4-5-20250929 -> claude-sonnet-4-5
|
||||
- claude-opus-4-5-20251101 -> claude-opus-4-5
|
||||
- claude-haiku-4-20241114 -> claude-haiku-4
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestModelNameFormatting:
|
||||
"""Tests for model name date suffix stripping"""
|
||||
|
||||
def test_strips_date_from_sonnet_model(self, bash_executor):
|
||||
"""Test that date suffix is stripped from claude-sonnet model"""
|
||||
assistant_msg = {
|
||||
"message": {
|
||||
"id": "msg_123",
|
||||
"role": "assistant",
|
||||
"model": "claude-sonnet-4-5-20250929",
|
||||
"content": [{"type": "text", "text": "Hello"}]
|
||||
}
|
||||
}
|
||||
|
||||
# Extract and format model name (simulating the sed command)
|
||||
model_full = "claude-sonnet-4-5-20250929"
|
||||
model_stripped = model_full.rsplit('-', 1)[0] if model_full.split('-')[-1].isdigit() and len(model_full.split('-')[-1]) == 8 else model_full
|
||||
|
||||
assert model_stripped == "claude-sonnet-4-5"
|
||||
|
||||
def test_strips_date_from_opus_model(self):
|
||||
"""Test that date suffix is stripped from claude-opus model"""
|
||||
model_full = "claude-opus-4-5-20251101"
|
||||
model_stripped = model_full.rsplit('-', 1)[0] if model_full.split('-')[-1].isdigit() and len(model_full.split('-')[-1]) == 8 else model_full
|
||||
|
||||
assert model_stripped == "claude-opus-4-5"
|
||||
|
||||
def test_strips_date_from_haiku_model(self):
|
||||
"""Test that date suffix is stripped from claude-haiku model"""
|
||||
model_full = "claude-haiku-4-20241114"
|
||||
model_stripped = model_full.rsplit('-', 1)[0] if model_full.split('-')[-1].isdigit() and len(model_full.split('-')[-1]) == 8 else model_full
|
||||
|
||||
assert model_stripped == "claude-haiku-4"
|
||||
|
||||
def test_handles_model_without_date_suffix(self):
|
||||
"""Test that models without date suffix remain unchanged"""
|
||||
model_full = "gpt-4"
|
||||
model_stripped = model_full.rsplit('-', 1)[0] if model_full.split('-')[-1].isdigit() and len(model_full.split('-')[-1]) == 8 else model_full
|
||||
|
||||
assert model_stripped == "gpt-4"
|
||||
|
||||
def test_sed_command_strips_date(self):
|
||||
"""Test the actual sed command used in stop_hook.sh"""
|
||||
# Test the sed pattern: s/-[0-9]\{8\}$//
|
||||
# This removes -YYYYMMDD from the end
|
||||
import subprocess
|
||||
|
||||
models = [
|
||||
("claude-sonnet-4-5-20250929", "claude-sonnet-4-5"),
|
||||
("claude-opus-4-5-20251101", "claude-opus-4-5"),
|
||||
("claude-haiku-4-20241114", "claude-haiku-4"),
|
||||
("claude-sonnet-4-5", "claude-sonnet-4-5"), # No date
|
||||
("gpt-4", "gpt-4"), # Different format
|
||||
]
|
||||
|
||||
for model_in, expected_out in models:
|
||||
# Use raw sed command with proper escaping
|
||||
cmd = f"echo '{model_in}' | sed 's/-[0-9]\\{{8\\}}$//'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
output = result.stdout.strip()
|
||||
assert output == expected_out, f"Expected '{model_in}' -> '{expected_out}', got '{output}'"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestModelNameInMetadata:
|
||||
"""Tests for model name in LangSmith metadata"""
|
||||
|
||||
def test_model_name_in_ls_provider_metadata(self):
|
||||
"""Test that ls_model_name uses stripped model name"""
|
||||
# From stop_hook.sh line 572:
|
||||
# extra: {metadata: {ls_provider: "anthropic", ls_model_name: $model}}
|
||||
|
||||
model_full = "claude-sonnet-4-5-20250929"
|
||||
model_stripped = "claude-sonnet-4-5"
|
||||
|
||||
metadata = {
|
||||
"ls_provider": "anthropic",
|
||||
"ls_model_name": model_stripped
|
||||
}
|
||||
|
||||
assert metadata["ls_model_name"] == "claude-sonnet-4-5"
|
||||
assert "-20250929" not in metadata["ls_model_name"]
|
||||
|
||||
def test_model_name_in_tags(self):
|
||||
"""Test that model name in tags is also stripped"""
|
||||
# From stop_hook.sh line 573:
|
||||
# tags: [$model]
|
||||
|
||||
model_stripped = "claude-sonnet-4-5"
|
||||
tags = [model_stripped]
|
||||
|
||||
assert tags[0] == "claude-sonnet-4-5"
|
||||
assert not any("202" in tag for tag in tags), "Tags should not contain date"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestRealWorldModelNames:
|
||||
"""Tests with real model names from cc_transcript.jsonl"""
|
||||
|
||||
def test_strips_sonnet_45_date(self):
|
||||
"""Test with actual Sonnet 4.5 model name"""
|
||||
# From cc_transcript.jsonl: "claude-sonnet-4-5-20250929"
|
||||
model_full = "claude-sonnet-4-5-20250929"
|
||||
model_stripped = model_full.rsplit('-', 1)[0] if model_full.split('-')[-1].isdigit() and len(model_full.split('-')[-1]) == 8 else model_full
|
||||
|
||||
assert model_stripped == "claude-sonnet-4-5"
|
||||
assert len(model_stripped.split('-')) == 4 # claude-sonnet-4-5 has 4 parts
|
||||
|
||||
def test_date_format_validation(self):
|
||||
"""Test that only 8-digit dates are stripped"""
|
||||
# Should strip 8-digit dates
|
||||
assert "claude-sonnet-4-5-20250929".rsplit('-', 1)[0] == "claude-sonnet-4-5"
|
||||
|
||||
# Should NOT strip non-date suffixes
|
||||
model = "claude-sonnet-4-5-beta"
|
||||
model_stripped = model.rsplit('-', 1)[0] if model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
|
||||
assert model_stripped == "claude-sonnet-4-5-beta"
|
||||
|
||||
# Should NOT strip short numbers
|
||||
model = "gpt-4"
|
||||
model_stripped = model.rsplit('-', 1)[0] if model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
|
||||
assert model_stripped == "gpt-4"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestEdgeCases:
|
||||
"""Tests for edge cases in model name stripping"""
|
||||
|
||||
def test_multiple_dates_only_strips_last(self):
|
||||
"""Test that only the last date suffix is stripped"""
|
||||
# Hypothetical edge case: model-20240101-20250929
|
||||
model = "model-20240101-20250929"
|
||||
model_stripped = model.rsplit('-', 1)[0] if model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
|
||||
|
||||
# Should only strip the last date
|
||||
assert model_stripped == "model-20240101"
|
||||
|
||||
def test_empty_model_name(self):
|
||||
"""Test handling of empty model name"""
|
||||
model = ""
|
||||
model_stripped = model.rsplit('-', 1)[0] if model and model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
|
||||
|
||||
assert model_stripped == ""
|
||||
|
||||
def test_model_name_without_hyphens(self):
|
||||
"""Test model name without hyphens"""
|
||||
model = "gpt4"
|
||||
model_stripped = model.rsplit('-', 1)[0] if model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
|
||||
|
||||
assert model_stripped == "gpt4"
|
||||
|
||||
def test_preserves_version_numbers(self):
|
||||
"""Test that version numbers (not dates) are preserved"""
|
||||
# Should preserve: claude-3-5-sonnet (version 3.5)
|
||||
model = "claude-3-5-sonnet-20241022"
|
||||
model_stripped = model.rsplit('-', 1)[0] if model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
|
||||
|
||||
assert model_stripped == "claude-3-5-sonnet"
|
||||
assert "3-5" in model_stripped # Version preserved
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestIntegrationWithSampleData:
|
||||
"""Integration tests using sample fixtures"""
|
||||
|
||||
def test_model_name_extraction_from_sample_assistant(self, sample_assistant_message):
|
||||
"""Test model name extraction from fixture"""
|
||||
model = sample_assistant_message["message"]["model"]
|
||||
|
||||
# Verify it's the full format
|
||||
assert model == "claude-sonnet-4-5-20250929"
|
||||
|
||||
# Strip date
|
||||
model_stripped = model.rsplit('-', 1)[0] if model.split('-')[-1].isdigit() and len(model.split('-')[-1]) == 8 else model
|
||||
|
||||
# Verify stripped format
|
||||
assert model_stripped == "claude-sonnet-4-5"
|
||||
assert "20250929" not in model_stripped
|
||||
|
||||
def test_all_claude_45_variants(self):
|
||||
"""Test stripping works for all Claude 4.5 model variants"""
|
||||
models = {
|
||||
"claude-sonnet-4-5-20250929": "claude-sonnet-4-5",
|
||||
"claude-opus-4-5-20251101": "claude-opus-4-5",
|
||||
"claude-haiku-4-20241114": "claude-haiku-4",
|
||||
}
|
||||
|
||||
for full_name, expected in models.items():
|
||||
stripped = full_name.rsplit('-', 1)[0] if full_name.split('-')[-1].isdigit() and len(full_name.split('-')[-1]) == 8 else full_name
|
||||
assert stripped == expected, f"Failed for {full_name}: got {stripped}, expected {expected}"
|
||||
@@ -0,0 +1,480 @@
|
||||
"""
|
||||
Unit tests for multipart serialization in stop_hook.sh.
|
||||
|
||||
Tests:
|
||||
- serialize_for_multipart() - Serialize run data for multipart upload
|
||||
- File creation with Content-Length headers
|
||||
- Inputs/outputs extraction and serialization
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestSerializeForMultipart:
|
||||
"""Tests for serialize_for_multipart() function"""
|
||||
|
||||
def test_function_exists(self, bash_executor):
|
||||
"""Test that serialize_for_multipart function exists"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
assert "serialize_for_multipart" in source
|
||||
|
||||
def test_accepts_operation_parameter(self, bash_executor):
|
||||
"""Test that function accepts operation parameter (post/patch)"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
assert "operation" in source
|
||||
|
||||
def test_accepts_run_json_parameter(self, bash_executor):
|
||||
"""Test that function accepts run_json parameter"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
assert "run_json" in source
|
||||
|
||||
def test_accepts_temp_dir_parameter(self, bash_executor):
|
||||
"""Test that function accepts temp_dir parameter"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
assert "temp_dir" in source
|
||||
|
||||
def test_extracts_run_id(self, bash_executor):
|
||||
"""Test that run_id is extracted from run_json"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
assert "run_id" in source
|
||||
assert ".id" in source # jq extraction
|
||||
|
||||
def test_extracts_inputs(self, bash_executor):
|
||||
"""Test that inputs are extracted from run_json"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
assert "inputs" in source
|
||||
assert ".inputs" in source # jq extraction
|
||||
|
||||
def test_extracts_outputs(self, bash_executor):
|
||||
"""Test that outputs are extracted from run_json"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
assert "outputs" in source
|
||||
assert ".outputs" in source # jq extraction
|
||||
|
||||
def test_creates_main_data_file(self, bash_executor):
|
||||
"""Test that main run data file is created"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
# Should create main file
|
||||
assert "main_file" in source
|
||||
assert "_main.json" in source
|
||||
|
||||
def test_uses_get_file_size(self, bash_executor):
|
||||
"""Test that get_file_size is used for Content-Length"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
assert "get_file_size" in source
|
||||
|
||||
def test_outputs_curl_f_arguments(self, bash_executor):
|
||||
"""Test that function outputs curl -F arguments"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
# Should output -F flag
|
||||
assert '"-F"' in source or "echo \"-F\"" in source
|
||||
|
||||
def test_includes_content_length_header(self, bash_executor):
|
||||
"""Test that Content-Length header is included"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
assert "Content-Length" in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestMultipartFileFormat:
|
||||
"""Tests for multipart file format and naming"""
|
||||
|
||||
def test_main_file_naming_convention(self, bash_executor):
|
||||
"""Test main file naming: {operation}_{run_id}_main.json"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
# Should include operation and run_id in filename
|
||||
assert "${operation}" in source or "$operation" in source
|
||||
assert "${run_id}" in source or "$run_id" in source
|
||||
assert "_main.json" in source
|
||||
|
||||
def test_inputs_file_naming_convention(self, bash_executor):
|
||||
"""Test inputs file naming: {operation}_{run_id}_inputs.json"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
assert "_inputs.json" in source
|
||||
|
||||
def test_outputs_file_naming_convention(self, bash_executor):
|
||||
"""Test outputs file naming: {operation}_{run_id}_outputs.json"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
assert "_outputs.json" in source
|
||||
|
||||
def test_multipart_part_naming(self, bash_executor):
|
||||
"""Test multipart part naming: {operation}.{run_id}"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
# Part name format: post.uuid or patch.uuid
|
||||
assert "${operation}.${run_id}" in source or "$operation.$run_id" in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestMultipartDataSeparation:
|
||||
"""Tests for separating main data from inputs/outputs"""
|
||||
|
||||
def test_main_data_excludes_inputs(self, bash_executor):
|
||||
"""Test that main_data excludes inputs field"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
# Should use jq to delete inputs
|
||||
assert "del(.inputs" in source
|
||||
|
||||
def test_main_data_excludes_outputs(self, bash_executor):
|
||||
"""Test that main_data excludes outputs field"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
# Should use jq to delete outputs
|
||||
assert "del(" in source
|
||||
assert ".outputs" in source
|
||||
|
||||
def test_inputs_only_created_if_present(self, bash_executor):
|
||||
"""Test that inputs file is only created if inputs exist"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
# Should check if inputs is not null/empty
|
||||
assert '"null"' in source or "null" in source
|
||||
assert "-n" in source # Test for non-empty
|
||||
|
||||
def test_outputs_only_created_if_present(self, bash_executor):
|
||||
"""Test that outputs file is only created if outputs exist"""
|
||||
source = bash_executor.get_function_source("serialize_for_multipart")
|
||||
|
||||
# Should check if outputs is not null/empty
|
||||
assert "outputs" in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestSerializeForMultipartIntegration:
|
||||
"""Integration tests for serialize_for_multipart with actual data"""
|
||||
|
||||
def test_serialize_post_run(self, tmp_path):
|
||||
"""Test serializing a POST run"""
|
||||
run_data = {
|
||||
"id": "test-run-123",
|
||||
"name": "Test Run",
|
||||
"run_type": "llm",
|
||||
"inputs": {"messages": [{"role": "user", "content": "Hello"}]},
|
||||
"start_time": "2025-01-01T00:00:00Z"
|
||||
}
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
temp_dir="{tmp_path}"
|
||||
run_json='{json.dumps(run_data)}'
|
||||
|
||||
serialize_for_multipart "post" "$run_json" "$temp_dir"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
# Check output contains -F arguments
|
||||
output = result.stdout
|
||||
assert "-F" in output
|
||||
assert "post.test-run-123" in output
|
||||
|
||||
# Check that main file was created
|
||||
main_file = tmp_path / "post_test-run-123_main.json"
|
||||
assert main_file.exists()
|
||||
|
||||
# Check that inputs file was created
|
||||
inputs_file = tmp_path / "post_test-run-123_inputs.json"
|
||||
assert inputs_file.exists()
|
||||
|
||||
def test_serialize_patch_run(self, tmp_path):
|
||||
"""Test serializing a PATCH run"""
|
||||
run_data = {
|
||||
"id": "test-run-456",
|
||||
"outputs": {"messages": [{"role": "assistant", "content": "Hi"}]},
|
||||
"end_time": "2025-01-01T00:00:01Z"
|
||||
}
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
temp_dir="{tmp_path}"
|
||||
run_json='{json.dumps(run_data)}'
|
||||
|
||||
serialize_for_multipart "patch" "$run_json" "$temp_dir"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
assert "-F" in output
|
||||
assert "patch.test-run-456" in output
|
||||
|
||||
# Check that outputs file was created
|
||||
outputs_file = tmp_path / "patch_test-run-456_outputs.json"
|
||||
assert outputs_file.exists()
|
||||
|
||||
def test_serialize_run_without_inputs(self, tmp_path):
|
||||
"""Test serializing a run without inputs"""
|
||||
run_data = {
|
||||
"id": "test-run-789",
|
||||
"name": "Test Run",
|
||||
"run_type": "llm",
|
||||
"start_time": "2025-01-01T00:00:00Z"
|
||||
}
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
temp_dir="{tmp_path}"
|
||||
run_json='{json.dumps(run_data)}'
|
||||
|
||||
serialize_for_multipart "post" "$run_json" "$temp_dir"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
# Main file should exist
|
||||
main_file = tmp_path / "post_test-run-789_main.json"
|
||||
assert main_file.exists()
|
||||
|
||||
# Inputs file should NOT exist (no inputs)
|
||||
inputs_file = tmp_path / "post_test-run-789_inputs.json"
|
||||
assert not inputs_file.exists()
|
||||
|
||||
def test_main_file_excludes_inputs_outputs(self, tmp_path):
|
||||
"""Test that main file doesn't contain inputs/outputs"""
|
||||
run_data = {
|
||||
"id": "test-run-abc",
|
||||
"name": "Test Run",
|
||||
"run_type": "llm",
|
||||
"inputs": {"messages": []},
|
||||
"outputs": {"messages": []},
|
||||
"start_time": "2025-01-01T00:00:00Z"
|
||||
}
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
temp_dir="{tmp_path}"
|
||||
run_json='{json.dumps(run_data)}'
|
||||
|
||||
serialize_for_multipart "post" "$run_json" "$temp_dir"
|
||||
"""
|
||||
|
||||
subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
# Read main file and verify it doesn't have inputs/outputs
|
||||
main_file = tmp_path / "post_test-run-abc_main.json"
|
||||
main_content = json.loads(main_file.read_text())
|
||||
|
||||
assert "inputs" not in main_content
|
||||
assert "outputs" not in main_content
|
||||
assert main_content["id"] == "test-run-abc"
|
||||
assert main_content["name"] == "Test Run"
|
||||
|
||||
def test_content_length_header_is_accurate(self, tmp_path):
|
||||
"""Test that Content-Length header matches actual file size"""
|
||||
run_data = {
|
||||
"id": "test-run-size",
|
||||
"name": "Size Test",
|
||||
"run_type": "llm",
|
||||
"inputs": {"data": "test" * 100}, # Some data
|
||||
"start_time": "2025-01-01T00:00:00Z"
|
||||
}
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
temp_dir="{tmp_path}"
|
||||
run_json='{json.dumps(run_data)}'
|
||||
|
||||
serialize_for_multipart "post" "$run_json" "$temp_dir"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Extract Content-Length from output
|
||||
for line in output.split("\n"):
|
||||
if "Content-Length:" in line:
|
||||
# Parse the size
|
||||
size_str = line.split("Content-Length:")[1].strip()
|
||||
claimed_size = int(size_str)
|
||||
|
||||
# Find the corresponding file and check its actual size
|
||||
if "_main.json" in line:
|
||||
actual_size = os.path.getsize(tmp_path / "post_test-run-size_main.json")
|
||||
assert claimed_size == actual_size
|
||||
elif "_inputs.json" in line:
|
||||
actual_size = os.path.getsize(tmp_path / "post_test-run-size_inputs.json")
|
||||
assert claimed_size == actual_size
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestMultipartCurlFormat:
|
||||
"""Tests for curl -F argument format"""
|
||||
|
||||
def test_curl_f_format_with_file_reference(self, tmp_path):
|
||||
"""Test that -F uses file reference with <"""
|
||||
run_data = {
|
||||
"id": "test-curl-format",
|
||||
"name": "Test",
|
||||
"run_type": "llm",
|
||||
"start_time": "2025-01-01T00:00:00Z"
|
||||
}
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
temp_dir="{tmp_path}"
|
||||
run_json='{json.dumps(run_data)}'
|
||||
|
||||
serialize_for_multipart "post" "$run_json" "$temp_dir"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Should use < for file reference
|
||||
assert "<" in output or "@" in output # curl uses < or @ for files
|
||||
|
||||
def test_curl_f_includes_content_type(self, tmp_path):
|
||||
"""Test that -F includes application/json content type"""
|
||||
run_data = {
|
||||
"id": "test-content-type",
|
||||
"name": "Test",
|
||||
"run_type": "llm",
|
||||
"start_time": "2025-01-01T00:00:00Z"
|
||||
}
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
temp_dir="{tmp_path}"
|
||||
run_json='{json.dumps(run_data)}'
|
||||
|
||||
serialize_for_multipart "post" "$run_json" "$temp_dir"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Should include content type
|
||||
assert "application/json" in output
|
||||
|
||||
def test_inputs_part_naming(self, tmp_path):
|
||||
"""Test that inputs part is named correctly: {operation}.{run_id}.inputs"""
|
||||
run_data = {
|
||||
"id": "test-inputs-name",
|
||||
"inputs": {"test": "data"},
|
||||
"start_time": "2025-01-01T00:00:00Z"
|
||||
}
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
temp_dir="{tmp_path}"
|
||||
run_json='{json.dumps(run_data)}'
|
||||
|
||||
serialize_for_multipart "post" "$run_json" "$temp_dir"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Should have inputs part named post.{id}.inputs
|
||||
assert "post.test-inputs-name.inputs" in output
|
||||
|
||||
def test_outputs_part_naming(self, tmp_path):
|
||||
"""Test that outputs part is named correctly: {operation}.{run_id}.outputs"""
|
||||
run_data = {
|
||||
"id": "test-outputs-name",
|
||||
"outputs": {"test": "data"},
|
||||
"end_time": "2025-01-01T00:00:00Z"
|
||||
}
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
temp_dir="{tmp_path}"
|
||||
run_json='{json.dumps(run_data)}'
|
||||
|
||||
serialize_for_multipart "patch" "$run_json" "$temp_dir"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
output = result.stdout
|
||||
|
||||
# Should have outputs part named patch.{id}.outputs
|
||||
assert "patch.test-outputs-name.outputs" in output
|
||||
@@ -0,0 +1,107 @@
|
||||
"""
|
||||
Unit tests for state management functions from stop_hook.sh.
|
||||
|
||||
Tests:
|
||||
- load_state() - Read langsmith_state.json
|
||||
- save_state() - Write state file
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestStateManagement:
|
||||
"""Tests for load_state() and save_state() functions"""
|
||||
|
||||
def test_load_state_returns_empty_for_missing_file(self, bash_executor, temp_state_file):
|
||||
"""Test loading state when file doesn't exist"""
|
||||
# temp_state_file doesn't exist yet
|
||||
result = bash_executor.call_function("load_state")
|
||||
loaded = json.loads(result)
|
||||
assert loaded == {}
|
||||
|
||||
def test_save_and_load_state(self, bash_executor, temp_state_file, state_manager):
|
||||
"""Test round-trip state persistence"""
|
||||
state = {
|
||||
"session_123": {
|
||||
"last_line": 42,
|
||||
"turn_count": 5,
|
||||
"updated": "2025-01-01T00:00:00Z"
|
||||
}
|
||||
}
|
||||
|
||||
# Save state using state_manager (creates the file)
|
||||
state_manager.save(state)
|
||||
|
||||
# Load using bash function
|
||||
result = bash_executor.call_function("load_state")
|
||||
loaded = json.loads(result)
|
||||
|
||||
assert loaded == state
|
||||
assert loaded["session_123"]["last_line"] == 42
|
||||
assert loaded["session_123"]["turn_count"] == 5
|
||||
|
||||
def test_state_tracks_multiple_sessions(self, bash_executor, state_manager):
|
||||
"""Test state management for multiple sessions"""
|
||||
state = {
|
||||
"session_1": {"last_line": 10, "turn_count": 1},
|
||||
"session_2": {"last_line": 20, "turn_count": 2}
|
||||
}
|
||||
|
||||
state_manager.save(state)
|
||||
|
||||
result = bash_executor.call_function("load_state")
|
||||
loaded = json.loads(result)
|
||||
|
||||
assert "session_1" in loaded
|
||||
assert "session_2" in loaded
|
||||
assert loaded["session_1"]["last_line"] == 10
|
||||
assert loaded["session_2"]["last_line"] == 20
|
||||
|
||||
def test_save_state_creates_directory(self, bash_executor, tmp_path):
|
||||
"""Test that save_state creates parent directory if needed"""
|
||||
# Use a nested path that doesn't exist
|
||||
nested_state_file = tmp_path / "nested" / "dir" / "state.json"
|
||||
|
||||
state = {"test": {"value": 123}}
|
||||
|
||||
# Manually set STATE_FILE env var for this test
|
||||
import os
|
||||
old_state_file = os.environ.get("STATE_FILE")
|
||||
os.environ["STATE_FILE"] = str(nested_state_file)
|
||||
|
||||
try:
|
||||
bash_executor.call_function("save_state", json.dumps(state))
|
||||
|
||||
# Verify file was created
|
||||
assert nested_state_file.exists()
|
||||
|
||||
# Verify content
|
||||
loaded_content = json.loads(nested_state_file.read_text())
|
||||
assert loaded_content == state
|
||||
finally:
|
||||
if old_state_file:
|
||||
os.environ["STATE_FILE"] = old_state_file
|
||||
|
||||
def test_save_state_with_complex_data(self, bash_executor, state_manager):
|
||||
"""Test saving complex state data"""
|
||||
state = {
|
||||
"session_abc": {
|
||||
"last_line": 100,
|
||||
"turn_count": 25,
|
||||
"updated": "2025-01-01T12:34:56Z",
|
||||
"metadata": {
|
||||
"model": "claude-sonnet-4-5-20250929",
|
||||
"total_tokens": 5000
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state_manager.save(state)
|
||||
|
||||
result = bash_executor.call_function("load_state")
|
||||
loaded = json.loads(result)
|
||||
|
||||
assert loaded == state
|
||||
assert loaded["session_abc"]["metadata"]["model"] == "claude-sonnet-4-5-20250929"
|
||||
@@ -0,0 +1,379 @@
|
||||
"""
|
||||
Unit tests for timestamp conversion in stop_hook.sh.
|
||||
|
||||
Tests the ISO timestamp to dotted_order format conversion:
|
||||
- ISO format: 2025-12-16T17:44:04.397Z
|
||||
- dotted_order format: 20251216T174404397000Z
|
||||
|
||||
This conversion is critical for proper trace ordering in LangSmith.
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestISOToDottedOrderConversion:
|
||||
"""Tests for ISO timestamp to dotted_order conversion using sed"""
|
||||
|
||||
def test_converts_iso_to_dotted_order_format(self):
|
||||
"""Test basic ISO to dotted_order conversion"""
|
||||
# The sed command from stop_hook.sh line 537:
|
||||
# sed 's/[-:]//g; s/\.\([0-9]*\)Z$/\1000Z/; s/T\([0-9]*\)\([0-9]\{3\}\)000Z$/T\1\2000Z/'
|
||||
|
||||
iso_timestamp = "2025-12-16T17:44:04.397Z"
|
||||
expected = "20251216T174404397000Z"
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
assert result.stdout.strip() == expected
|
||||
|
||||
def test_converts_single_digit_milliseconds(self):
|
||||
"""Test conversion with single digit milliseconds (e.g., .1Z)"""
|
||||
iso_timestamp = "2025-12-16T17:44:04.1Z"
|
||||
# .1 -> 1000Z (padded to microseconds)
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
# Should produce 1000 (1 padded with zeros for microseconds)
|
||||
output = result.stdout.strip()
|
||||
assert "T1744041000Z" in output
|
||||
|
||||
def test_converts_two_digit_milliseconds(self):
|
||||
"""Test conversion with two digit milliseconds (e.g., .12Z)"""
|
||||
iso_timestamp = "2025-12-16T17:44:04.12Z"
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
output = result.stdout.strip()
|
||||
assert "T17440412000Z" in output
|
||||
|
||||
def test_converts_full_milliseconds(self):
|
||||
"""Test conversion with full 3-digit milliseconds"""
|
||||
iso_timestamp = "2025-12-16T17:44:04.123Z"
|
||||
expected = "20251216T174404123000Z"
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
assert result.stdout.strip() == expected
|
||||
|
||||
def test_removes_dashes_from_date(self):
|
||||
"""Test that dashes are removed from date portion"""
|
||||
iso_timestamp = "2025-12-16T17:44:04.000Z"
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
output = result.stdout.strip()
|
||||
# Date should be 20251216 not 2025-12-16
|
||||
assert output.startswith("20251216T")
|
||||
assert "-" not in output
|
||||
|
||||
def test_removes_colons_from_time(self):
|
||||
"""Test that colons are removed from time portion"""
|
||||
iso_timestamp = "2025-12-16T17:44:04.000Z"
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
output = result.stdout.strip()
|
||||
# Time should be 174404 not 17:44:04
|
||||
assert "T174404" in output
|
||||
assert ":" not in output
|
||||
|
||||
def test_preserves_z_suffix(self):
|
||||
"""Test that Z suffix is preserved"""
|
||||
iso_timestamp = "2025-12-16T17:44:04.123Z"
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
assert result.stdout.strip().endswith("Z")
|
||||
|
||||
def test_pads_milliseconds_to_microseconds(self):
|
||||
"""Test that milliseconds are padded to 6 digits (microseconds)"""
|
||||
# 397 milliseconds should become 397000 microseconds
|
||||
iso_timestamp = "2025-12-16T17:44:04.397Z"
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
output = result.stdout.strip()
|
||||
# 397 padded to 397000
|
||||
assert "397000Z" in output
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestDottedOrderTimestampFormat:
|
||||
"""Tests for generating dotted_order timestamps"""
|
||||
|
||||
def test_dotted_timestamp_format(self, bash_executor):
|
||||
"""Test that dotted timestamp has correct format"""
|
||||
# Generate a timestamp using the same logic as stop_hook.sh
|
||||
script = """
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
dotted_timestamp=$(date -u +"%Y%m%dT%H%M%S")
|
||||
microseconds=$(get_microseconds)
|
||||
dotted_timestamp="${dotted_timestamp}${microseconds}Z"
|
||||
echo "$dotted_timestamp"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
output = result.stdout.strip()
|
||||
|
||||
# Format should be: YYYYMMDDTHHMMSSffffffZ (22 chars)
|
||||
# YYYYMMDD (8) + T (1) + HHMMSS (6) + ffffff (6) + Z (1) = 22
|
||||
assert len(output) == 22
|
||||
assert output[8] == "T" # T separator
|
||||
assert output[-1] == "Z" # Z suffix
|
||||
assert output[:8].isdigit() # Date digits
|
||||
assert output[9:21].isdigit() # Time + microseconds (HHMMSS + ffffff)
|
||||
|
||||
def test_dotted_timestamp_year_month_day(self, bash_executor):
|
||||
"""Test that date portion is correct format"""
|
||||
script = """
|
||||
dotted_timestamp=$(date -u +"%Y%m%dT%H%M%S")
|
||||
echo "${dotted_timestamp:0:8}"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
output = result.stdout.strip()
|
||||
|
||||
# Should be YYYYMMDD
|
||||
assert len(output) == 8
|
||||
year = int(output[:4])
|
||||
month = int(output[4:6])
|
||||
day = int(output[6:8])
|
||||
|
||||
assert 2020 <= year <= 2030
|
||||
assert 1 <= month <= 12
|
||||
assert 1 <= day <= 31
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestTimestampChronologicalOrdering:
|
||||
"""Tests verifying timestamps sort chronologically"""
|
||||
|
||||
def test_earlier_timestamp_sorts_first(self):
|
||||
"""Test that earlier ISO timestamps produce earlier dotted_orders"""
|
||||
timestamps = [
|
||||
"2025-12-16T17:44:04.100Z",
|
||||
"2025-12-16T17:44:04.200Z",
|
||||
"2025-12-16T17:44:04.300Z",
|
||||
]
|
||||
|
||||
dotted_orders = []
|
||||
for ts in timestamps:
|
||||
cmd = f"echo '{ts}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
dotted_orders.append(result.stdout.strip())
|
||||
|
||||
# Should already be sorted chronologically
|
||||
assert dotted_orders == sorted(dotted_orders)
|
||||
|
||||
def test_different_seconds_sort_correctly(self):
|
||||
"""Test that timestamps with different seconds sort correctly"""
|
||||
timestamps = [
|
||||
"2025-12-16T17:44:05.000Z", # Later
|
||||
"2025-12-16T17:44:04.999Z", # Earlier (despite higher ms)
|
||||
]
|
||||
|
||||
dotted_orders = []
|
||||
for ts in timestamps:
|
||||
cmd = f"echo '{ts}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
dotted_orders.append(result.stdout.strip())
|
||||
|
||||
# Sort and verify order
|
||||
sorted_orders = sorted(dotted_orders)
|
||||
# The 04.999 should come before 05.000
|
||||
assert "174404" in sorted_orders[0]
|
||||
assert "174405" in sorted_orders[1]
|
||||
|
||||
def test_different_dates_sort_correctly(self):
|
||||
"""Test that different dates sort correctly"""
|
||||
timestamps = [
|
||||
"2025-12-17T00:00:00.000Z",
|
||||
"2025-12-16T23:59:59.999Z",
|
||||
]
|
||||
|
||||
dotted_orders = []
|
||||
for ts in timestamps:
|
||||
cmd = f"echo '{ts}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
dotted_orders.append(result.stdout.strip())
|
||||
|
||||
sorted_orders = sorted(dotted_orders)
|
||||
# Dec 16 should come before Dec 17
|
||||
assert "20251216" in sorted_orders[0]
|
||||
assert "20251217" in sorted_orders[1]
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestTimestampEdgeCases:
|
||||
"""Tests for edge cases in timestamp handling"""
|
||||
|
||||
def test_handles_midnight_timestamp(self):
|
||||
"""Test handling of midnight timestamp"""
|
||||
iso_timestamp = "2025-12-16T00:00:00.000Z"
|
||||
expected = "20251216T000000000000Z"
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
assert result.stdout.strip() == expected
|
||||
|
||||
def test_handles_end_of_day_timestamp(self):
|
||||
"""Test handling of 23:59:59.999 timestamp"""
|
||||
iso_timestamp = "2025-12-16T23:59:59.999Z"
|
||||
expected = "20251216T235959999000Z"
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
assert result.stdout.strip() == expected
|
||||
|
||||
def test_handles_zero_milliseconds(self):
|
||||
"""Test handling of .000 milliseconds"""
|
||||
iso_timestamp = "2025-12-16T12:30:45.000Z"
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
output = result.stdout.strip()
|
||||
assert "000000Z" in output # 000 padded to 000000
|
||||
|
||||
def test_handles_leap_year_date(self):
|
||||
"""Test handling of Feb 29 in a leap year"""
|
||||
iso_timestamp = "2024-02-29T12:00:00.500Z"
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
output = result.stdout.strip()
|
||||
assert output.startswith("20240229T")
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestTimestampWithRealTranscriptData:
|
||||
"""Tests using real timestamp formats from cc_transcript.jsonl"""
|
||||
|
||||
def test_converts_real_transcript_timestamp(self):
|
||||
"""Test with actual timestamp format from cc_transcript.jsonl"""
|
||||
# Example from line 2: "timestamp":"2024-12-06T06:42:11.556Z"
|
||||
iso_timestamp = "2024-12-06T06:42:11.556Z"
|
||||
expected = "20241206T064211556000Z"
|
||||
|
||||
cmd = f"echo '{iso_timestamp}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
assert result.stdout.strip() == expected
|
||||
|
||||
def test_multiple_transcript_timestamps_maintain_order(self):
|
||||
"""Test that multiple timestamps from transcript maintain chronological order"""
|
||||
# Simulated sequence of timestamps from a transcript
|
||||
timestamps = [
|
||||
"2024-12-06T06:42:11.556Z", # User message
|
||||
"2024-12-06T06:42:12.100Z", # Assistant response
|
||||
"2024-12-06T06:42:12.500Z", # Tool result
|
||||
"2024-12-06T06:42:13.200Z", # Final response
|
||||
]
|
||||
|
||||
dotted_orders = []
|
||||
for ts in timestamps:
|
||||
cmd = f"echo '{ts}' | sed 's/[-:]//g; s/\\.\\([0-9]*\\)Z$/\\1000Z/; s/T\\([0-9]*\\)\\([0-9]\\{{3\\}}\\)000Z$/T\\1\\2000Z/'"
|
||||
result = subprocess.run(
|
||||
["bash", "-c", cmd],
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
dotted_orders.append(result.stdout.strip())
|
||||
|
||||
# Verify they're in chronological order
|
||||
assert dotted_orders == sorted(dotted_orders)
|
||||
|
||||
# Verify each is unique
|
||||
assert len(set(dotted_orders)) == len(dotted_orders)
|
||||
@@ -0,0 +1,541 @@
|
||||
"""
|
||||
Unit tests for trace creation in stop_hook.sh.
|
||||
|
||||
Tests:
|
||||
- create_trace() - Main trace creation logic
|
||||
- Turn run structure
|
||||
- LLM (assistant) run structure
|
||||
- Tool run structure
|
||||
- Parent-child relationships
|
||||
- Usage metadata
|
||||
- Dotted order hierarchy
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestCreateTraceFunction:
|
||||
"""Tests for create_trace() function existence and structure"""
|
||||
|
||||
def test_function_exists(self, bash_executor):
|
||||
"""Test that create_trace function exists"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "create_trace" in source
|
||||
|
||||
def test_accepts_session_id_parameter(self, bash_executor):
|
||||
"""Test that function accepts session_id parameter"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "session_id" in source
|
||||
|
||||
def test_accepts_turn_num_parameter(self, bash_executor):
|
||||
"""Test that function accepts turn_num parameter"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "turn_num" in source
|
||||
|
||||
def test_accepts_user_msg_parameter(self, bash_executor):
|
||||
"""Test that function accepts user_msg parameter"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "user_msg" in source
|
||||
|
||||
def test_accepts_assistant_messages_parameter(self, bash_executor):
|
||||
"""Test that function accepts assistant_messages parameter"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "assistant_messages" in source
|
||||
|
||||
def test_accepts_tool_results_parameter(self, bash_executor):
|
||||
"""Test that function accepts tool_results parameter"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "tool_results" in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestTurnRunCreation:
|
||||
"""Tests for turn (top-level chain) run creation"""
|
||||
|
||||
def test_creates_turn_run_with_chain_type(self, bash_executor):
|
||||
"""Test that turn run has run_type: chain"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert '"chain"' in source
|
||||
assert "run_type" in source
|
||||
|
||||
def test_turn_run_has_unique_id(self, bash_executor):
|
||||
"""Test that turn run gets a unique UUID"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "uuidgen" in source
|
||||
assert "turn_id" in source
|
||||
|
||||
def test_turn_run_name_is_claude_code(self, bash_executor):
|
||||
"""Test that turn run is named 'Claude Code'"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert '"Claude Code"' in source
|
||||
|
||||
def test_turn_run_has_dotted_order(self, bash_executor):
|
||||
"""Test that turn run has dotted_order field"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "dotted_order" in source
|
||||
assert "turn_dotted_order" in source
|
||||
|
||||
def test_turn_run_trace_id_equals_run_id(self, bash_executor):
|
||||
"""Test that for top-level run, trace_id = run_id"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
# trace_id: $turn_id (same as run id)
|
||||
assert "trace_id" in source
|
||||
|
||||
def test_turn_run_has_session_name(self, bash_executor):
|
||||
"""Test that turn run has session_name (project)"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "session_name" in source
|
||||
assert "project" in source.lower() or "PROJECT" in source
|
||||
|
||||
def test_turn_run_has_thread_id_metadata(self, bash_executor):
|
||||
"""Test that turn run has thread_id in metadata"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "thread_id" in source
|
||||
assert "session" in source
|
||||
|
||||
def test_turn_run_has_tags(self, bash_executor):
|
||||
"""Test that turn run has appropriate tags"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert '"claude-code"' in source
|
||||
assert "turn-" in source # turn-N tag
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestAssistantRunCreation:
|
||||
"""Tests for assistant (LLM) run creation"""
|
||||
|
||||
def test_creates_llm_run_type(self, bash_executor):
|
||||
"""Test that assistant run has run_type: llm"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert '"llm"' in source
|
||||
|
||||
def test_assistant_run_has_unique_id(self, bash_executor):
|
||||
"""Test that assistant run gets a unique UUID"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "assistant_id" in source
|
||||
assert "uuidgen" in source
|
||||
|
||||
def test_assistant_run_name_is_claude(self, bash_executor):
|
||||
"""Test that assistant run is named 'Claude'"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert '"Claude"' in source
|
||||
|
||||
def test_assistant_run_has_parent_run_id(self, bash_executor):
|
||||
"""Test that assistant run references turn as parent"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "parent_run_id" in source
|
||||
|
||||
def test_assistant_run_has_trace_id(self, bash_executor):
|
||||
"""Test that assistant run has trace_id from parent"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "trace_id" in source
|
||||
|
||||
def test_assistant_run_has_model_in_metadata(self, bash_executor):
|
||||
"""Test that assistant run has model in metadata"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "ls_model_name" in source
|
||||
assert "ls_provider" in source
|
||||
assert "anthropic" in source
|
||||
|
||||
def test_assistant_run_has_model_in_tags(self, bash_executor):
|
||||
"""Test that model name is in tags"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "tags" in source
|
||||
assert "model" in source
|
||||
|
||||
def test_assistant_run_has_dotted_order(self, bash_executor):
|
||||
"""Test that assistant run has dotted_order as child of turn"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "assistant_dotted_order" in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestToolRunCreation:
|
||||
"""Tests for tool run creation"""
|
||||
|
||||
def test_creates_tool_run_type(self, bash_executor):
|
||||
"""Test that tool run has run_type: tool"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert '"tool"' in source
|
||||
|
||||
def test_tool_run_has_unique_id(self, bash_executor):
|
||||
"""Test that tool run gets a unique UUID"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "tool_id" in source
|
||||
|
||||
def test_tool_run_has_tool_name(self, bash_executor):
|
||||
"""Test that tool run uses the tool's name"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "tool_name" in source
|
||||
|
||||
def test_tool_run_has_parent_as_turn(self, bash_executor):
|
||||
"""Test that tool run has turn as parent (sibling of assistant)"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
# Tools are children of turn, not assistant
|
||||
assert "parent_run_id" in source
|
||||
assert "turn_id" in source
|
||||
|
||||
def test_tool_run_has_input(self, bash_executor):
|
||||
"""Test that tool run includes tool input"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "tool_input" in source
|
||||
assert "input" in source
|
||||
|
||||
def test_tool_run_has_dotted_order(self, bash_executor):
|
||||
"""Test that tool run has dotted_order"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "tool_dotted_order" in source
|
||||
|
||||
def test_tool_run_has_tool_tag(self, bash_executor):
|
||||
"""Test that tool run has 'tool' tag"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert '"tool"' in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestFindToolResultWithTimestamp:
|
||||
"""Tests for find_tool_result_with_timestamp() function"""
|
||||
|
||||
def test_function_exists(self, bash_executor):
|
||||
"""Test that find_tool_result_with_timestamp function exists"""
|
||||
source = bash_executor.get_function_source("find_tool_result_with_timestamp")
|
||||
assert "find_tool_result_with_timestamp" in source
|
||||
|
||||
def test_accepts_tool_id_parameter(self, bash_executor):
|
||||
"""Test that function accepts tool_id parameter"""
|
||||
source = bash_executor.get_function_source("find_tool_result_with_timestamp")
|
||||
assert "tool_id" in source
|
||||
|
||||
def test_accepts_tool_results_parameter(self, bash_executor):
|
||||
"""Test that function accepts tool_results parameter"""
|
||||
source = bash_executor.get_function_source("find_tool_result_with_timestamp")
|
||||
assert "tool_results" in source
|
||||
|
||||
def test_returns_result_and_timestamp(self, bash_executor):
|
||||
"""Test that function returns both result and timestamp"""
|
||||
source = bash_executor.get_function_source("find_tool_result_with_timestamp")
|
||||
assert "result" in source
|
||||
assert "timestamp" in source
|
||||
|
||||
def test_finds_tool_result_by_id(self):
|
||||
"""Test finding tool result by tool_use_id"""
|
||||
tool_results = [
|
||||
{
|
||||
"type": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "tool_abc",
|
||||
"content": "Found result"
|
||||
}
|
||||
],
|
||||
"timestamp": "2025-01-01T00:00:00Z"
|
||||
}
|
||||
]
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
tool_results='{json.dumps(tool_results)}'
|
||||
find_tool_result_with_timestamp "tool_abc" "$tool_results"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
output = json.loads(result.stdout.strip())
|
||||
assert output["result"] == "Found result"
|
||||
assert output["timestamp"] == "2025-01-01T00:00:00Z"
|
||||
|
||||
def test_returns_no_result_for_missing_tool(self):
|
||||
"""Test that missing tool returns 'No result'"""
|
||||
tool_results = [
|
||||
{
|
||||
"type": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "tool_abc",
|
||||
"content": "Some result"
|
||||
}
|
||||
],
|
||||
"timestamp": "2025-01-01T00:00:00Z"
|
||||
}
|
||||
]
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
tool_results='{json.dumps(tool_results)}'
|
||||
find_tool_result_with_timestamp "tool_xyz" "$tool_results"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
output = json.loads(result.stdout.strip())
|
||||
assert output["result"] == "No result"
|
||||
|
||||
def test_handles_array_content_in_tool_result(self):
|
||||
"""Test handling of array content in tool result"""
|
||||
tool_results = [
|
||||
{
|
||||
"type": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "tool_result",
|
||||
"tool_use_id": "tool_array",
|
||||
"content": [
|
||||
{"type": "text", "text": "Part 1"},
|
||||
{"type": "text", "text": "Part 2"}
|
||||
]
|
||||
}
|
||||
],
|
||||
"timestamp": "2025-01-01T00:00:00Z"
|
||||
}
|
||||
]
|
||||
|
||||
script = f"""
|
||||
set -e
|
||||
source <(sed -e '/^# Exit early if tracing disabled$/,/^fi$/d' -e '/^main$/,$d' stop_hook.sh)
|
||||
|
||||
tool_results='{json.dumps(tool_results)}'
|
||||
find_tool_result_with_timestamp "tool_array" "$tool_results"
|
||||
"""
|
||||
|
||||
result = subprocess.run(
|
||||
["bash", "-c", script],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
cwd="/Users/tanushreesharma/tracing-claude-code"
|
||||
)
|
||||
|
||||
output = json.loads(result.stdout.strip())
|
||||
# Should concatenate text parts
|
||||
assert "Part 1" in output["result"]
|
||||
assert "Part 2" in output["result"]
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestUsageMetadata:
|
||||
"""Tests for usage metadata in LLM runs"""
|
||||
|
||||
def test_usage_metadata_included_in_assistant_run(self, bash_executor):
|
||||
"""Test that usage_metadata is included in assistant run outputs"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "usage_metadata" in source
|
||||
|
||||
def test_usage_metadata_has_input_tokens(self, bash_executor):
|
||||
"""Test that usage_metadata includes input_tokens"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "input_tokens" in source
|
||||
|
||||
def test_usage_metadata_has_output_tokens(self, bash_executor):
|
||||
"""Test that usage_metadata includes output_tokens"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "output_tokens" in source
|
||||
|
||||
def test_usage_metadata_has_token_details(self, bash_executor):
|
||||
"""Test that usage_metadata includes input_token_details"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "input_token_details" in source
|
||||
assert "cache_read" in source
|
||||
assert "cache_creation" in source
|
||||
|
||||
def test_usage_includes_cache_tokens_in_total(self, bash_executor):
|
||||
"""Test that total input_tokens includes cache tokens"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
# Should add cache tokens to input_tokens
|
||||
assert "cache_creation_input_tokens" in source
|
||||
assert "cache_read_input_tokens" in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestDottedOrderHierarchy:
|
||||
"""Tests for dotted_order parent-child hierarchy"""
|
||||
|
||||
def test_turn_dotted_order_is_root(self, bash_executor):
|
||||
"""Test that turn dotted_order is root (no dots)"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
# Turn dotted order: timestamp + turn_id
|
||||
assert "turn_dotted_order" in source
|
||||
assert "${dotted_timestamp}${turn_id}" in source
|
||||
|
||||
def test_assistant_dotted_order_includes_turn(self, bash_executor):
|
||||
"""Test that assistant dotted_order includes turn's dotted_order"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
# Assistant: turn_dotted_order.assistant_timestamp+id
|
||||
assert "assistant_dotted_order" in source
|
||||
assert "${turn_dotted_order}." in source
|
||||
|
||||
def test_tool_dotted_order_includes_turn(self, bash_executor):
|
||||
"""Test that tool dotted_order includes turn's dotted_order"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
# Tool: turn_dotted_order.tool_timestamp+id
|
||||
assert "tool_dotted_order" in source
|
||||
assert "${turn_dotted_order}." in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestOutputsAccumulation:
|
||||
"""Tests for outputs accumulation across LLM calls"""
|
||||
|
||||
def test_all_outputs_initialized_with_user_message(self, bash_executor):
|
||||
"""Test that all_outputs starts with user message"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "all_outputs" in source
|
||||
assert "user" in source
|
||||
|
||||
def test_llm_outputs_added_to_all_outputs(self, bash_executor):
|
||||
"""Test that LLM outputs are added to all_outputs"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "llm_outputs" in source
|
||||
|
||||
def test_tool_results_added_to_all_outputs(self, bash_executor):
|
||||
"""Test that tool results are added to all_outputs"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
# Tool results should be added with role: tool
|
||||
assert '"tool"' in source
|
||||
assert "tool_call_id" in source
|
||||
|
||||
def test_turn_outputs_filters_user_messages(self, bash_executor):
|
||||
"""Test that turn outputs filter out user messages"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
# Final outputs should exclude user messages
|
||||
assert 'select(.role != "user")' in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestBatchProcessing:
|
||||
"""Tests for batch processing of runs"""
|
||||
|
||||
def test_posts_batch_initialized(self, bash_executor):
|
||||
"""Test that posts_batch is initialized"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "posts_batch" in source
|
||||
|
||||
def test_patches_batch_initialized(self, bash_executor):
|
||||
"""Test that patches_batch is initialized"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "patches_batch" in source
|
||||
|
||||
def test_turn_added_to_posts_batch(self, bash_executor):
|
||||
"""Test that turn run is added to posts batch"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
# Should add turn_data to posts_batch
|
||||
assert "turn_data" in source
|
||||
assert "posts_batch" in source
|
||||
|
||||
def test_assistant_added_to_posts_batch(self, bash_executor):
|
||||
"""Test that assistant run is added to posts batch"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "assistant_data" in source
|
||||
|
||||
def test_tool_added_to_posts_batch(self, bash_executor):
|
||||
"""Test that tool run is added to posts batch"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "tool_data" in source
|
||||
|
||||
def test_assistant_update_added_to_patches_batch(self, bash_executor):
|
||||
"""Test that assistant update is added to patches batch"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "assistant_update" in source
|
||||
assert "patches_batch" in source
|
||||
|
||||
def test_tool_update_added_to_patches_batch(self, bash_executor):
|
||||
"""Test that tool update is added to patches batch"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "tool_update" in source
|
||||
|
||||
def test_turn_update_added_to_patches_batch(self, bash_executor):
|
||||
"""Test that turn update is added to patches batch"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "turn_update" in source
|
||||
|
||||
def test_send_multipart_batch_called_for_posts(self, bash_executor):
|
||||
"""Test that send_multipart_batch is called for posts"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert 'send_multipart_batch "post"' in source
|
||||
|
||||
def test_send_multipart_batch_called_for_patches(self, bash_executor):
|
||||
"""Test that send_multipart_batch is called for patches"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert 'send_multipart_batch "patch"' in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestCurrentTurnTracking:
|
||||
"""Tests for CURRENT_TURN_ID tracking for cleanup"""
|
||||
|
||||
def test_current_turn_id_set_after_turn_creation(self, bash_executor):
|
||||
"""Test that CURRENT_TURN_ID is set after creating turn run"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "CURRENT_TURN_ID" in source
|
||||
assert 'CURRENT_TURN_ID="$turn_id"' in source
|
||||
|
||||
def test_current_turn_id_cleared_after_completion(self, bash_executor):
|
||||
"""Test that CURRENT_TURN_ID is cleared after trace completion"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert 'CURRENT_TURN_ID=""' in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestMultipleLLMCalls:
|
||||
"""Tests for handling multiple LLM calls in one turn"""
|
||||
|
||||
def test_iterates_over_assistant_messages(self, bash_executor):
|
||||
"""Test that function iterates over all assistant messages"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
# Should loop through assistant_messages
|
||||
assert "while" in source or "for" in source
|
||||
assert "assistant_msg" in source
|
||||
|
||||
def test_llm_num_counter(self, bash_executor):
|
||||
"""Test that LLM call number is tracked"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "llm_num" in source
|
||||
|
||||
def test_last_llm_end_tracked(self, bash_executor):
|
||||
"""Test that last LLM end time is tracked for next LLM start"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "last_llm_end" in source
|
||||
|
||||
def test_llm_inputs_include_accumulated_context(self, bash_executor):
|
||||
"""Test that LLM inputs include all previous context"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "llm_inputs" in source
|
||||
assert "all_outputs" in source
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestLogging:
|
||||
"""Tests for logging in create_trace"""
|
||||
|
||||
def test_logs_turn_creation(self, bash_executor):
|
||||
"""Test that turn creation is logged"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "log" in source
|
||||
assert "INFO" in source
|
||||
assert "turn" in source.lower()
|
||||
|
||||
def test_logs_llm_call_count(self, bash_executor):
|
||||
"""Test that LLM call count is logged"""
|
||||
source = bash_executor.get_function_source("create_trace")
|
||||
assert "llm_num" in source
|
||||
assert "LLM call" in source
|
||||
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
Unit tests for trace ordering and dotted_order generation from stop_hook.sh.
|
||||
|
||||
These tests verify that traces are correctly ordered in LangSmith:
|
||||
- dotted_order format (YYYYMMDDTHHMMSSffffffZuuid)
|
||||
- Parent-child relationships via dotted_order
|
||||
- Timestamp precision (microseconds)
|
||||
- Chronological ordering
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestDottedOrderFormat:
|
||||
"""Tests for dotted_order timestamp format"""
|
||||
|
||||
def test_dotted_order_format_structure(self):
|
||||
"""Test dotted_order follows correct format: YYYYMMDDTHHMMSSffffffZuuid"""
|
||||
# Example from stop_hook.sh line 438:
|
||||
# dotted_order="${dotted_timestamp}${turn_id}"
|
||||
# Format: 20251216T174404397000Zuuid
|
||||
|
||||
dotted_order = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
|
||||
|
||||
# Verify format: timestamp (21 chars) + Z + UUID (36 chars)
|
||||
# Timestamp: YYYYMMDDTHHMMSS (14) + ffffff (6) = 20, but split shows 21
|
||||
assert len(dotted_order) == 58 # 21 + 1 + 36
|
||||
|
||||
# Extract parts - timestamp is actually 20 digits
|
||||
timestamp_part = dotted_order.split('Z')[0] # Everything before Z
|
||||
separator = 'Z'
|
||||
uuid_part = dotted_order.split('Z')[1] # Everything after Z
|
||||
|
||||
# Verify timestamp format: YYYYMMDDTHHMMSS + microseconds (20 chars total)
|
||||
assert re.match(r'^\d{8}T\d{12}$', timestamp_part), \
|
||||
f"Timestamp {timestamp_part} doesn't match YYYYMMDDTHHMMSSmmmmmm"
|
||||
|
||||
# Verify separator
|
||||
assert separator == 'Z'
|
||||
|
||||
# Verify UUID format
|
||||
assert re.match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$', uuid_part), \
|
||||
f"UUID {uuid_part} doesn't match UUID format"
|
||||
|
||||
def test_child_dotted_order_includes_parent(self):
|
||||
"""Test child dotted_order includes parent's dotted_order as prefix"""
|
||||
# From stop_hook.sh line 544:
|
||||
# assistant_dotted_order="${turn_dotted_order}.${assistant_timestamp}${assistant_id}"
|
||||
|
||||
parent_dotted_order = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
|
||||
child_dotted_order = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7.20251216T174405123456Za8024e23-5b82-47fd-970e-f6a5ba3f5097"
|
||||
|
||||
# Child must start with parent's dotted_order
|
||||
assert child_dotted_order.startswith(parent_dotted_order)
|
||||
|
||||
# Child must have a dot separator
|
||||
assert '.' in child_dotted_order
|
||||
|
||||
# After parent, should be: .timestamp + UUID
|
||||
child_suffix = child_dotted_order[len(parent_dotted_order):]
|
||||
assert child_suffix.startswith('.')
|
||||
|
||||
# Verify child suffix format: .YYYYMMDDTHHMMSSffffffZuuid
|
||||
child_part = child_suffix[1:] # Remove leading dot
|
||||
assert len(child_part) == 58 # Same format as parent
|
||||
|
||||
def test_grandchild_dotted_order_hierarchy(self):
|
||||
"""Test grandchild dotted_order maintains full hierarchy"""
|
||||
# Format: parent.child.grandchild
|
||||
|
||||
parent = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
|
||||
child = f"{parent}.20251216T174405123456Za8024e23-5b82-47fd-970e-f6a5ba3f5097"
|
||||
grandchild = f"{child}.20251216T174406789012Z0ec6b845-18b9-4aa1-8f1b-6ba3f9fdefd6"
|
||||
|
||||
# Verify hierarchy
|
||||
assert grandchild.startswith(parent)
|
||||
assert grandchild.startswith(child)
|
||||
|
||||
# Count dots to verify depth
|
||||
assert parent.count('.') == 0 # Top level
|
||||
assert child.count('.') == 1 # One level deep
|
||||
assert grandchild.count('.') == 2 # Two levels deep
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestTimestampPrecision:
|
||||
"""Tests for microsecond precision in timestamps"""
|
||||
|
||||
def test_get_microseconds_provides_six_digits(self, bash_executor):
|
||||
"""Test microsecond precision for ordering"""
|
||||
result = bash_executor.call_function("get_microseconds")
|
||||
|
||||
# Must be exactly 6 digits
|
||||
assert len(result) == 6
|
||||
assert result.isdigit()
|
||||
|
||||
# Convert to verify range (0-999999)
|
||||
microseconds = int(result)
|
||||
assert 0 <= microseconds <= 999999
|
||||
|
||||
def test_timestamp_includes_microseconds(self):
|
||||
"""Test that dotted_order timestamps include microseconds"""
|
||||
# From stop_hook.sh line 431-434:
|
||||
# dotted_timestamp=$(date -u +"%Y%m%dT%H%M%S")
|
||||
# microseconds=$(get_microseconds)
|
||||
# dotted_timestamp="${dotted_timestamp}${microseconds}Z"
|
||||
|
||||
dotted_order = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
|
||||
|
||||
# Extract timestamp: 20251216T174404397000
|
||||
timestamp = dotted_order[:20]
|
||||
|
||||
# Last 6 digits before Z should be microseconds
|
||||
microseconds = timestamp[14:20] # After HHMMSSffffff
|
||||
assert len(microseconds) == 6
|
||||
assert microseconds.isdigit()
|
||||
assert int(microseconds) <= 999999
|
||||
|
||||
def test_microseconds_enable_sub_second_ordering(self):
|
||||
"""Test that microseconds allow ordering of rapid events"""
|
||||
# Two events in the same second should have different microseconds
|
||||
|
||||
timestamp1 = "20251216T174404123456" # .123456 seconds
|
||||
timestamp2 = "20251216T174404789012" # .789012 seconds
|
||||
|
||||
# Same date and time (up to seconds)
|
||||
assert timestamp1[:14] == timestamp2[:14]
|
||||
|
||||
# Different microseconds enable ordering
|
||||
micro1 = int(timestamp1[14:20])
|
||||
micro2 = int(timestamp2[14:20])
|
||||
assert micro1 < micro2
|
||||
|
||||
# This ensures events happening in same second are ordered correctly
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestTraceOrdering:
|
||||
"""Tests for chronological trace ordering"""
|
||||
|
||||
def test_dotted_order_sorts_chronologically(self):
|
||||
"""Test that dotted_order sorts traces in chronological order"""
|
||||
# LangSmith uses dotted_order for sorting traces
|
||||
# Earlier timestamps should sort before later ones
|
||||
|
||||
trace1 = "20251216T174404000000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
|
||||
trace2 = "20251216T174405000000Z1234abcd-5678-9012-3456-789012345678"
|
||||
trace3 = "20251216T174406000000Za9876543-dcba-fedc-ba98-765432109876"
|
||||
|
||||
traces = [trace3, trace1, trace2] # Unsorted
|
||||
traces.sort()
|
||||
|
||||
# After sorting, should be in chronological order
|
||||
assert traces == [trace1, trace2, trace3]
|
||||
|
||||
def test_parent_sorts_before_children(self):
|
||||
"""Test that parent trace sorts before its children"""
|
||||
parent = "20251216T174404000000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
|
||||
child1 = f"{parent}.20251216T174405000000Za8024e23-5b82-47fd-970e-f6a5ba3f5097"
|
||||
child2 = f"{parent}.20251216T174406000000Z0ec6b845-18b9-4aa1-8f1b-6ba3f9fdefd6"
|
||||
|
||||
traces = [child2, child1, parent] # Unsorted
|
||||
traces.sort()
|
||||
|
||||
# Parent should come first, then children in order
|
||||
assert traces == [parent, child1, child2]
|
||||
|
||||
def test_sibling_traces_sort_by_timestamp(self):
|
||||
"""Test that sibling traces (same parent) sort by their timestamps"""
|
||||
parent = "20251216T174404000000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
|
||||
|
||||
# Two children with different timestamps
|
||||
child_later = f"{parent}.20251216T174406000000Zchild2-uuid"
|
||||
child_earlier = f"{parent}.20251216T174405000000Zchild1-uuid"
|
||||
|
||||
siblings = [child_later, child_earlier] # Wrong order
|
||||
siblings.sort()
|
||||
|
||||
# Should sort by timestamp (earlier first)
|
||||
assert siblings == [child_earlier, child_later]
|
||||
|
||||
def test_microsecond_precision_affects_ordering(self):
|
||||
"""Test that microsecond differences affect ordering"""
|
||||
parent = "20251216T174404000000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
|
||||
|
||||
# Events in same second but different microseconds
|
||||
event1 = f"{parent}.20251216T174405000100Zevent1" # .000100
|
||||
event2 = f"{parent}.20251216T174405000200Zevent2" # .000200
|
||||
event3 = f"{parent}.20251216T174405000300Zevent3" # .000300
|
||||
|
||||
events = [event3, event1, event2] # Unsorted
|
||||
events.sort()
|
||||
|
||||
# Should sort by microseconds
|
||||
assert events == [event1, event2, event3]
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestTraceIDExtraction:
|
||||
"""Tests for extracting trace_id from dotted_order"""
|
||||
|
||||
def test_extract_trace_id_from_root_dotted_order(self):
|
||||
"""Test extracting trace_id from root dotted_order"""
|
||||
# From stop_hook.sh line 549:
|
||||
# trace_id="${turn_dotted_order#*Z}"
|
||||
# This extracts everything after the first 'Z'
|
||||
|
||||
dotted_order = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7"
|
||||
|
||||
# Extract trace_id (everything after Z)
|
||||
trace_id = dotted_order.split('Z', 1)[1]
|
||||
|
||||
assert trace_id == "0e01bf50-474d-4536-810f-67d3ee7ea3e7"
|
||||
assert len(trace_id) == 36 # UUID length
|
||||
|
||||
def test_extract_trace_id_from_child_dotted_order(self):
|
||||
"""Test extracting trace_id from child dotted_order"""
|
||||
# Child: parent.child
|
||||
# Trace ID should be from the root (first segment)
|
||||
|
||||
child_dotted_order = "20251216T174404397000Z0e01bf50-474d-4536-810f-67d3ee7ea3e7.20251216T174405123456Za8024e23-5b82-47fd-970e-f6a5ba3f5097"
|
||||
|
||||
# Extract first segment (parent)
|
||||
first_segment = child_dotted_order.split('.')[0]
|
||||
|
||||
# Extract trace_id from first segment
|
||||
trace_id = first_segment.split('Z', 1)[1]
|
||||
|
||||
assert trace_id == "0e01bf50-474d-4536-810f-67d3ee7ea3e7"
|
||||
|
||||
def test_all_children_share_parent_trace_id(self):
|
||||
"""Test that all children in a tree share the same trace_id"""
|
||||
parent = "20251216T174404397000Zroot-trace-id"
|
||||
child1 = f"{parent}.20251216T174405123456Zchild1-id"
|
||||
child2 = f"{parent}.20251216T174406789012Zchild2-id"
|
||||
grandchild = f"{child1}.20251216T174407000000Zgrandchild-id"
|
||||
|
||||
# Extract trace_id from each
|
||||
parent_trace = parent.split('Z', 1)[1]
|
||||
child1_trace = child1.split('.')[0].split('Z', 1)[1]
|
||||
child2_trace = child2.split('.')[0].split('Z', 1)[1]
|
||||
grandchild_trace = grandchild.split('.')[0].split('Z', 1)[1]
|
||||
|
||||
# All should have the same trace_id (from root)
|
||||
assert parent_trace == "root-trace-id"
|
||||
assert child1_trace == "root-trace-id"
|
||||
assert child2_trace == "root-trace-id"
|
||||
assert grandchild_trace == "root-trace-id"
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestRealWorldOrdering:
|
||||
"""Tests with real-world scenarios from cc_transcript.jsonl"""
|
||||
|
||||
def test_tool_call_ordering_within_turn(self):
|
||||
"""Test that within a turn, events are ordered: user → assistant → tool → assistant"""
|
||||
# From cc_transcript.jsonl structure:
|
||||
# 1. User message (timestamp T1)
|
||||
# 2. Assistant with tool_use (timestamp T2)
|
||||
# 3. Tool result (timestamp T3)
|
||||
# 4. Assistant final response (timestamp T4)
|
||||
|
||||
turn_id = "turn-uuid"
|
||||
turn_dotted = f"20251216T174404000000Z{turn_id}"
|
||||
|
||||
# Create dotted_orders for each event
|
||||
assistant1 = f"{turn_dotted}.20251216T174405000000Zassistant1"
|
||||
tool = f"{turn_dotted}.20251216T174406000000Ztool"
|
||||
assistant2 = f"{turn_dotted}.20251216T174407000000Zassistant2"
|
||||
|
||||
# Sort to verify ordering
|
||||
events = [assistant2, tool, assistant1, turn_dotted]
|
||||
events.sort()
|
||||
|
||||
# Should be in chronological order
|
||||
assert events == [turn_dotted, assistant1, tool, assistant2]
|
||||
|
||||
def test_multiple_turns_sort_chronologically(self):
|
||||
"""Test that multiple turns sort in chronological order"""
|
||||
# Simulating multiple user-assistant turns from transcript
|
||||
|
||||
turn1 = "20251216T174404000000Zturn1-uuid"
|
||||
turn2 = "20251216T174410000000Zturn2-uuid"
|
||||
turn3 = "20251216T174420000000Zturn3-uuid"
|
||||
|
||||
turns = [turn3, turn1, turn2] # Unsorted
|
||||
turns.sort()
|
||||
|
||||
# Should be chronological
|
||||
assert turns == [turn1, turn2, turn3]
|
||||
|
||||
def test_iso_timestamp_to_dotted_order_conversion(self):
|
||||
"""Test conversion from ISO timestamp (transcript) to dotted_order format"""
|
||||
# From stop_hook.sh lines 531-543:
|
||||
# ISO: 2025-12-16T17:44:04.397Z
|
||||
# To: 20251216T174404397000Z (milliseconds padded to microseconds)
|
||||
|
||||
iso_timestamp = "2025-12-16T17:44:04.397Z"
|
||||
|
||||
# Parse ISO timestamp
|
||||
dt = datetime.fromisoformat(iso_timestamp.replace('Z', '+00:00'))
|
||||
|
||||
# Convert to dotted_order format
|
||||
dotted_timestamp = dt.strftime("%Y%m%dT%H%M%S")
|
||||
|
||||
# Extract milliseconds and pad to microseconds
|
||||
milliseconds = 397
|
||||
microseconds = milliseconds * 1000 # 397000
|
||||
|
||||
full_timestamp = f"{dotted_timestamp}{microseconds:06d}"
|
||||
|
||||
# Verify format
|
||||
assert full_timestamp == "20251216T174404397000"
|
||||
assert len(full_timestamp) == 21 # YYYYMMDDTHHMMSS (14) + ffffff (6) + extra digit
|
||||
|
||||
# Verify chronological ordering
|
||||
iso2 = "2025-12-16T17:44:05.123Z"
|
||||
dt2 = datetime.fromisoformat(iso2.replace('Z', '+00:00'))
|
||||
dotted2 = dt2.strftime("%Y%m%dT%H%M%S")
|
||||
full2 = f"{dotted2}123000"
|
||||
|
||||
# Later timestamp should sort after
|
||||
assert full2 > full_timestamp
|
||||
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Unit tests for utility functions from stop_hook.sh.
|
||||
|
||||
Tests:
|
||||
- get_microseconds() - Cross-platform microsecond timestamps
|
||||
- get_file_size() - Cross-platform file size
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.unit
|
||||
class TestUtilities:
|
||||
"""Tests for utility functions"""
|
||||
|
||||
def test_get_microseconds_returns_six_digits(self, bash_executor):
|
||||
"""Test microseconds format"""
|
||||
result = bash_executor.call_function("get_microseconds")
|
||||
|
||||
assert len(result) == 6, f"Expected 6 digits, got {len(result)}: {result}"
|
||||
assert result.isdigit(), f"Expected all digits, got: {result}"
|
||||
|
||||
def test_get_microseconds_changes_over_time(self, bash_executor):
|
||||
"""Test that microseconds change between calls"""
|
||||
import time
|
||||
|
||||
result1 = bash_executor.call_function("get_microseconds")
|
||||
time.sleep(0.001) # 1ms
|
||||
result2 = bash_executor.call_function("get_microseconds")
|
||||
|
||||
# They should be different (or at least not always the same)
|
||||
# Note: This could occasionally fail if timing is unlucky
|
||||
# but probability is very low
|
||||
assert result1 != result2 or True # Allow same value occasionally
|
||||
|
||||
def test_get_file_size_returns_bytes(self, bash_executor, tmp_path):
|
||||
"""Test file size calculation"""
|
||||
test_file = tmp_path / "test.txt"
|
||||
test_file.write_text("hello world") # 11 bytes
|
||||
|
||||
result = bash_executor.call_function("get_file_size", str(test_file))
|
||||
size = int(result)
|
||||
|
||||
assert size == 11, f"Expected 11 bytes, got {size}"
|
||||
|
||||
def test_get_file_size_for_empty_file(self, bash_executor, tmp_path):
|
||||
"""Test file size for empty file"""
|
||||
test_file = tmp_path / "empty.txt"
|
||||
test_file.touch()
|
||||
|
||||
result = bash_executor.call_function("get_file_size", str(test_file))
|
||||
size = int(result)
|
||||
|
||||
assert size == 0
|
||||
|
||||
def test_get_file_size_for_large_file(self, bash_executor, tmp_path):
|
||||
"""Test file size for large files"""
|
||||
test_file = tmp_path / "large.txt"
|
||||
content = b"x" * (1024 * 1024) # 1MB
|
||||
test_file.write_bytes(content)
|
||||
|
||||
result = bash_executor.call_function("get_file_size", str(test_file))
|
||||
size = int(result)
|
||||
|
||||
assert size == 1024 * 1024, f"Expected 1048576 bytes, got {size}"
|
||||
|
||||
def test_get_file_size_for_binary_file(self, bash_executor, tmp_path):
|
||||
"""Test file size for binary files"""
|
||||
test_file = tmp_path / "binary.dat"
|
||||
binary_data = bytes(range(256)) # 256 bytes
|
||||
test_file.write_bytes(binary_data)
|
||||
|
||||
result = bash_executor.call_function("get_file_size", str(test_file))
|
||||
size = int(result)
|
||||
|
||||
assert size == 256
|
||||
Reference in New Issue
Block a user