feat: Add email-based workshop identifier and configure nbstripout

Implement deterministic resource naming using email-based identifiers
and configure automatic output cell stripping for notebooks.

Changes:
- Add interactive workshop identifier setup in 01_preflight.ipynb
  - Prompts student for email address
  - Hashes email (MD5, 6 chars) for privacy and determinism
  - Creates identifier: -workshop-YYYYMMDD-<hash>
  - Saves to artifacts/workshop_identifier.json

- Update 02_terraform_apply.ipynb to use identifier
  - Loads identifier from artifacts file
  - Passes identifier to Terraform via -var identifier=...
  - Removes incorrect cluster_name variable (not a Terraform input)
  - Computes expected cluster name for validation

- Update 01_preflight.ipynb cluster check
  - Uses identifier to compute cluster name
  - Falls back to CLUSTER_NAME env var if identifier not set

- Configure nbstripout for automatic output cell stripping
  - Add .gitattributes with *.ipynb filter=nbstripout
  - Ensures output cells are never committed

Benefits:
- No additional environment variables required
- Deterministic identifiers (same email = same identifier)
- Idempotent deployments (safe to re-run)
- Unique per student
- Automatic output cell management
This commit is contained in:
Cory Waddingham
2026-01-06 09:27:55 -08:00
parent 34fa0e6874
commit 62b8abef6c
5 changed files with 146 additions and 7 deletions
+3
View File
@@ -0,0 +1,3 @@
# Automatically strip output cells from Jupyter notebooks before committing
*.ipynb filter=nbstripout
+1 -1
View File
@@ -15,7 +15,7 @@ AWS_REGION="us-east-1"
AWS_ACCOUNT_ID=""
# Naming (used by notebooks for display + validation)
CLUSTER_NAME="langsmith-workshop"
#CLUSTER_NAME=""
# Local repo paths (absolute is safest)
TERRAFORM_REPO_DIR="$HOME/src/langchain-ai/terraform"
+90 -2
View File
@@ -129,6 +129,77 @@
" print(f\"💡 Tip: Set {account_var} in your .env file to add a guardrail against wrong account deployments\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Workshop Identifier Setup\n",
"\n",
"To ensure unique resource names and enable idempotent deployments, we need a unique identifier for your workshop deployment. This identifier will be used for all Terraform resources.\n",
"\n",
"**We'll use your email address** (hashed for privacy) to create a deterministic identifier that:\n",
"- ✅ Stays the same across notebook runs (idempotent)\n",
"- ✅ Is unique per student\n",
"- ✅ Works with the date-based prefix for resource naming\n",
"\n",
"Enter your email address below. It will be hashed and used to generate your unique workshop identifier.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Generate deterministic workshop identifier from email\n",
"import hashlib\n",
"import json\n",
"from datetime import date\n",
"from pathlib import Path\n",
"\n",
"print(\"### Workshop Identifier Setup\\n\")\n",
"print(\"Enter your email address to generate a unique, deterministic identifier for your deployment.\\n\")\n",
"print(\"This identifier will be used for all Terraform resources and ensures:\")\n",
"print(\" - Same email = same identifier (idempotent)\")\n",
"print(\" - Different emails = different identifiers (unique)\")\n",
"print(\" - No additional environment variables needed\\n\")\n",
"\n",
"# Prompt for email (using input() - works in Jupyter)\n",
"email = input(\"Enter your email address: \").strip().lower()\n",
"\n",
"if not email or \"@\" not in email:\n",
" raise ValueError(\"Invalid email address. Please enter a valid email.\")\n",
"\n",
"# Hash email for privacy and determinism\n",
"email_hash = hashlib.md5(email.encode()).hexdigest()[:6]\n",
"\n",
"# Build identifier: -workshop-YYYYMMDD-<hash>\n",
"today = date.today()\n",
"date_str = today.strftime('%Y%m%d')\n",
"workshop_identifier = f\"-workshop-{date_str}-{email_hash}\"\n",
"\n",
"# Save to artifacts directory for use in Terraform notebook\n",
"identifier_file = Path(bootstrap_info['artifacts_dir']) / \"workshop_identifier.json\"\n",
"identifier_data = {\n",
" \"email_hash\": email_hash,\n",
" \"identifier\": workshop_identifier,\n",
" \"date\": date_str,\n",
" \"created_at\": date.today().isoformat()\n",
"}\n",
"\n",
"with open(identifier_file, 'w') as f:\n",
" json.dump(identifier_data, f, indent=2)\n",
"\n",
"print(f\"\\n✅ Workshop identifier generated:\")\n",
"print(f\" Identifier: {workshop_identifier}\")\n",
"print(f\" Date component: {date_str}\")\n",
"print(f\" Hash (from email): {email_hash}\")\n",
"print(f\"\\n💡 This identifier will be used for all Terraform resources\")\n",
"print(f\" Saved to: {identifier_file}\")\n",
"print(f\"\\n⚠️ IMPORTANT: Use the same email address if you re-run this notebook\")\n",
"print(f\" to ensure Terraform can manage existing resources.\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
@@ -153,7 +224,6 @@
"required_vars = [\n",
" \"WORKSHOP_NAME\",\n",
" \"NAMESPACE\",\n",
" \"CLUSTER_NAME\",\n",
" \"TERRAFORM_DIR\",\n",
" \"HELM_RELEASE\",\n",
" \"HELM_NAMESPACE\",\n",
@@ -222,9 +292,27 @@
" get_kubernetes_service_name,\n",
")\n",
"from shared._shell import run\n",
"from shared._validation import ok, warn, require_env\n",
"from pathlib import Path\n",
"import json\n",
"\n",
"# Load workshop identifier if it exists (from identifier setup cell)\n",
"identifier_file = Path(bootstrap_info['artifacts_dir']) / \"workshop_identifier.json\"\n",
"if identifier_file.exists():\n",
" with open(identifier_file) as f:\n",
" identifier_data = json.load(f)\n",
" workshop_identifier = identifier_data[\"identifier\"]\n",
" # Compute expected cluster name: langsmith-eks${identifier}\n",
" cluster_name = f\"langsmith-eks{workshop_identifier}\"\n",
" print(f\"💡 Using cluster name from workshop identifier: {cluster_name}\\n\")\n",
"else:\n",
" # Fallback to CLUSTER_NAME env var if identifier not set yet\n",
" config = require_env(\"CLUSTER_NAME\")\n",
" cluster_name = config[\"CLUSTER_NAME\"]\n",
" warn(\"Workshop identifier not found - using CLUSTER_NAME from environment\")\n",
" print(\"💡 Run the 'Workshop Identifier Setup' cell above to generate a unique identifier\\n\")\n",
"\n",
"provider = get_cloud_provider()\n",
"cluster_name = os.environ[\"CLUSTER_NAME\"]\n",
"region = get_region()\n",
"k8s_service = get_kubernetes_service_name()\n",
"\n",
+48 -4
View File
@@ -97,6 +97,7 @@
"source": [
"import os\n",
"import re\n",
"import json\n",
"from pathlib import Path\n",
"from shared._validation import require_env, ok, warn, fail\n",
"from shared._shell import run\n",
@@ -131,15 +132,41 @@
"terraform_dir_str = expand_env_vars(config[\"TERRAFORM_DIR\"])\n",
"terraform_dir = Path(terraform_dir_str).expanduser().resolve()\n",
"\n",
"cluster_name = config[\"CLUSTER_NAME\"]\n",
"# Load workshop identifier from preflight notebook\n",
"identifier_file = artifacts_dir / \"workshop_identifier.json\"\n",
"if not identifier_file.exists():\n",
" fail(f\"Workshop identifier not found: {identifier_file}\")\n",
" print(\"\\n💡 To fix this:\")\n",
" print(\" 1. Run the preflight notebook (01_preflight.ipynb) first\")\n",
" print(\" 2. Complete the 'Workshop Identifier Setup' cell\")\n",
" print(\" 3. Then return to this notebook\")\n",
" raise RuntimeError(f\"Workshop identifier not found. Please run 01_preflight.ipynb first.\")\n",
"\n",
"with open(identifier_file) as f:\n",
" identifier_data = json.load(f)\n",
"\n",
"workshop_identifier = identifier_data[\"identifier\"]\n",
"print(f\"✅ Loaded workshop identifier: {workshop_identifier}\")\n",
"\n",
"# Compute expected cluster name for validation/display\n",
"# Terraform computes: cluster_name = \"langsmith-eks${local.identifier}\"\n",
"cluster_name = f\"langsmith-eks{workshop_identifier}\"\n",
"\n",
"region = config[region_var]\n",
"workshop_name = config[\"WORKSHOP_NAME\"]\n",
"\n",
"print(\"### Terraform Configuration\")\n",
"print(\"\\n### Terraform Configuration\")\n",
"print(f\"Terraform Directory: {terraform_dir}\")\n",
"print(f\"Cluster Name: {cluster_name}\")\n",
"print(f\"Workshop Identifier: {workshop_identifier}\")\n",
"print(f\"Expected Cluster Name: {cluster_name}\")\n",
"print(f\"Region: {region}\")\n",
"print(f\"Workshop Name: {workshop_name}\\n\")\n",
"print(f\"Workshop Name: {workshop_name}\")\n",
"print(f\"\\n💡 Terraform will use this identifier for all resource names:\")\n",
"print(f\" Cluster: langsmith-eks{workshop_identifier}\")\n",
"print(f\" Redis: langsmith-redis{workshop_identifier}\")\n",
"print(f\" S3: langsmith-s3{workshop_identifier}\")\n",
"print(f\" Postgres: langsmith-postgres{workshop_identifier}\")\n",
"print(f\" VPC: langsmith-vpc{workshop_identifier}\\n\")\n",
"\n",
"if not terraform_dir.exists():\n",
" fail(f\"Terraform directory does not exist: {terraform_dir}\")\n",
@@ -371,6 +398,8 @@
"metadata": {},
"outputs": [],
"source": [
"import getpass\n",
"\n",
"# Create terraform plan\n",
"plan_file = artifacts_dir / \"terraform-plan.txt\"\n",
"\n",
@@ -383,7 +412,22 @@
"postgres_username = os.environ.get(\"POSTGRES_USERNAME\", \"\").strip()\n",
"postgres_password = os.environ.get(\"POSTGRES_PASSWORD\", \"\").strip()\n",
"\n",
"if not postgres_username:\n",
" print(\"Please provide a PostgreSQL username: \")\n",
" postgres_username = input().strip()\n",
"\n",
"if not postgres_password:\n",
" print(\"Please provide a PostgreSQL password: \")\n",
" postgres_password = getpass.getpass().strip()\n",
"\n",
"print(\"### Terraform Variables\\n\")\n",
"\n",
"# Pass workshop identifier to Terraform\n",
"# This is the key variable that controls all resource naming\n",
"terraform_vars.extend([\"-var\", f\"identifier={workshop_identifier}\"])\n",
"print(f\"✅ IDENTIFIER: {workshop_identifier}\")\n",
"print(f\" This will be used for all resource names (cluster, redis, s3, postgres, vpc)\\n\")\n",
"\n",
"missing_vars = []\n",
"\n",
"if postgres_username:\n",
+4
View File
@@ -1,5 +1,6 @@
from __future__ import annotations
import os
from datetime import date
def ok(msg: str) -> None:
print(f"{msg}")
@@ -18,6 +19,9 @@ def require_env(*keys: str) -> dict:
if not v:
missing.append(k)
cfg[k] = v
if k == 'CLUSTER_NAME':
# Add a hardcoded prefix to the cluster name
cfg[k] = f"langsmith-workshop-{date.today().strftime('%Y%m%d')}-{v}"
if missing:
fail(f"Missing required environment variables: {', '.join(missing)}")
return cfg