mirror of
https://github.com/langchain-ai/langsmith-self-hosted-workshops.git
synced 2026-07-01 20:44:14 -04:00
feat: Add email-based workshop identifier and configure nbstripout
Implement deterministic resource naming using email-based identifiers and configure automatic output cell stripping for notebooks. Changes: - Add interactive workshop identifier setup in 01_preflight.ipynb - Prompts student for email address - Hashes email (MD5, 6 chars) for privacy and determinism - Creates identifier: -workshop-YYYYMMDD-<hash> - Saves to artifacts/workshop_identifier.json - Update 02_terraform_apply.ipynb to use identifier - Loads identifier from artifacts file - Passes identifier to Terraform via -var identifier=... - Removes incorrect cluster_name variable (not a Terraform input) - Computes expected cluster name for validation - Update 01_preflight.ipynb cluster check - Uses identifier to compute cluster name - Falls back to CLUSTER_NAME env var if identifier not set - Configure nbstripout for automatic output cell stripping - Add .gitattributes with *.ipynb filter=nbstripout - Ensures output cells are never committed Benefits: - No additional environment variables required - Deterministic identifiers (same email = same identifier) - Idempotent deployments (safe to re-run) - Unique per student - Automatic output cell management
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
# Automatically strip output cells from Jupyter notebooks before committing
|
||||
*.ipynb filter=nbstripout
|
||||
|
||||
@@ -15,7 +15,7 @@ AWS_REGION="us-east-1"
|
||||
AWS_ACCOUNT_ID=""
|
||||
|
||||
# Naming (used by notebooks for display + validation)
|
||||
CLUSTER_NAME="langsmith-workshop"
|
||||
#CLUSTER_NAME=""
|
||||
|
||||
# Local repo paths (absolute is safest)
|
||||
TERRAFORM_REPO_DIR="$HOME/src/langchain-ai/terraform"
|
||||
|
||||
@@ -129,6 +129,77 @@
|
||||
" print(f\"💡 Tip: Set {account_var} in your .env file to add a guardrail against wrong account deployments\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Workshop Identifier Setup\n",
|
||||
"\n",
|
||||
"To ensure unique resource names and enable idempotent deployments, we need a unique identifier for your workshop deployment. This identifier will be used for all Terraform resources.\n",
|
||||
"\n",
|
||||
"**We'll use your email address** (hashed for privacy) to create a deterministic identifier that:\n",
|
||||
"- ✅ Stays the same across notebook runs (idempotent)\n",
|
||||
"- ✅ Is unique per student\n",
|
||||
"- ✅ Works with the date-based prefix for resource naming\n",
|
||||
"\n",
|
||||
"Enter your email address below. It will be hashed and used to generate your unique workshop identifier.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Generate deterministic workshop identifier from email\n",
|
||||
"import hashlib\n",
|
||||
"import json\n",
|
||||
"from datetime import date\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"print(\"### Workshop Identifier Setup\\n\")\n",
|
||||
"print(\"Enter your email address to generate a unique, deterministic identifier for your deployment.\\n\")\n",
|
||||
"print(\"This identifier will be used for all Terraform resources and ensures:\")\n",
|
||||
"print(\" - Same email = same identifier (idempotent)\")\n",
|
||||
"print(\" - Different emails = different identifiers (unique)\")\n",
|
||||
"print(\" - No additional environment variables needed\\n\")\n",
|
||||
"\n",
|
||||
"# Prompt for email (using input() - works in Jupyter)\n",
|
||||
"email = input(\"Enter your email address: \").strip().lower()\n",
|
||||
"\n",
|
||||
"if not email or \"@\" not in email:\n",
|
||||
" raise ValueError(\"Invalid email address. Please enter a valid email.\")\n",
|
||||
"\n",
|
||||
"# Hash email for privacy and determinism\n",
|
||||
"email_hash = hashlib.md5(email.encode()).hexdigest()[:6]\n",
|
||||
"\n",
|
||||
"# Build identifier: -workshop-YYYYMMDD-<hash>\n",
|
||||
"today = date.today()\n",
|
||||
"date_str = today.strftime('%Y%m%d')\n",
|
||||
"workshop_identifier = f\"-workshop-{date_str}-{email_hash}\"\n",
|
||||
"\n",
|
||||
"# Save to artifacts directory for use in Terraform notebook\n",
|
||||
"identifier_file = Path(bootstrap_info['artifacts_dir']) / \"workshop_identifier.json\"\n",
|
||||
"identifier_data = {\n",
|
||||
" \"email_hash\": email_hash,\n",
|
||||
" \"identifier\": workshop_identifier,\n",
|
||||
" \"date\": date_str,\n",
|
||||
" \"created_at\": date.today().isoformat()\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"with open(identifier_file, 'w') as f:\n",
|
||||
" json.dump(identifier_data, f, indent=2)\n",
|
||||
"\n",
|
||||
"print(f\"\\n✅ Workshop identifier generated:\")\n",
|
||||
"print(f\" Identifier: {workshop_identifier}\")\n",
|
||||
"print(f\" Date component: {date_str}\")\n",
|
||||
"print(f\" Hash (from email): {email_hash}\")\n",
|
||||
"print(f\"\\n💡 This identifier will be used for all Terraform resources\")\n",
|
||||
"print(f\" Saved to: {identifier_file}\")\n",
|
||||
"print(f\"\\n⚠️ IMPORTANT: Use the same email address if you re-run this notebook\")\n",
|
||||
"print(f\" to ensure Terraform can manage existing resources.\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@@ -153,7 +224,6 @@
|
||||
"required_vars = [\n",
|
||||
" \"WORKSHOP_NAME\",\n",
|
||||
" \"NAMESPACE\",\n",
|
||||
" \"CLUSTER_NAME\",\n",
|
||||
" \"TERRAFORM_DIR\",\n",
|
||||
" \"HELM_RELEASE\",\n",
|
||||
" \"HELM_NAMESPACE\",\n",
|
||||
@@ -222,9 +292,27 @@
|
||||
" get_kubernetes_service_name,\n",
|
||||
")\n",
|
||||
"from shared._shell import run\n",
|
||||
"from shared._validation import ok, warn, require_env\n",
|
||||
"from pathlib import Path\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"# Load workshop identifier if it exists (from identifier setup cell)\n",
|
||||
"identifier_file = Path(bootstrap_info['artifacts_dir']) / \"workshop_identifier.json\"\n",
|
||||
"if identifier_file.exists():\n",
|
||||
" with open(identifier_file) as f:\n",
|
||||
" identifier_data = json.load(f)\n",
|
||||
" workshop_identifier = identifier_data[\"identifier\"]\n",
|
||||
" # Compute expected cluster name: langsmith-eks${identifier}\n",
|
||||
" cluster_name = f\"langsmith-eks{workshop_identifier}\"\n",
|
||||
" print(f\"💡 Using cluster name from workshop identifier: {cluster_name}\\n\")\n",
|
||||
"else:\n",
|
||||
" # Fallback to CLUSTER_NAME env var if identifier not set yet\n",
|
||||
" config = require_env(\"CLUSTER_NAME\")\n",
|
||||
" cluster_name = config[\"CLUSTER_NAME\"]\n",
|
||||
" warn(\"Workshop identifier not found - using CLUSTER_NAME from environment\")\n",
|
||||
" print(\"💡 Run the 'Workshop Identifier Setup' cell above to generate a unique identifier\\n\")\n",
|
||||
"\n",
|
||||
"provider = get_cloud_provider()\n",
|
||||
"cluster_name = os.environ[\"CLUSTER_NAME\"]\n",
|
||||
"region = get_region()\n",
|
||||
"k8s_service = get_kubernetes_service_name()\n",
|
||||
"\n",
|
||||
|
||||
@@ -97,6 +97,7 @@
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import re\n",
|
||||
"import json\n",
|
||||
"from pathlib import Path\n",
|
||||
"from shared._validation import require_env, ok, warn, fail\n",
|
||||
"from shared._shell import run\n",
|
||||
@@ -131,15 +132,41 @@
|
||||
"terraform_dir_str = expand_env_vars(config[\"TERRAFORM_DIR\"])\n",
|
||||
"terraform_dir = Path(terraform_dir_str).expanduser().resolve()\n",
|
||||
"\n",
|
||||
"cluster_name = config[\"CLUSTER_NAME\"]\n",
|
||||
"# Load workshop identifier from preflight notebook\n",
|
||||
"identifier_file = artifacts_dir / \"workshop_identifier.json\"\n",
|
||||
"if not identifier_file.exists():\n",
|
||||
" fail(f\"Workshop identifier not found: {identifier_file}\")\n",
|
||||
" print(\"\\n💡 To fix this:\")\n",
|
||||
" print(\" 1. Run the preflight notebook (01_preflight.ipynb) first\")\n",
|
||||
" print(\" 2. Complete the 'Workshop Identifier Setup' cell\")\n",
|
||||
" print(\" 3. Then return to this notebook\")\n",
|
||||
" raise RuntimeError(f\"Workshop identifier not found. Please run 01_preflight.ipynb first.\")\n",
|
||||
"\n",
|
||||
"with open(identifier_file) as f:\n",
|
||||
" identifier_data = json.load(f)\n",
|
||||
"\n",
|
||||
"workshop_identifier = identifier_data[\"identifier\"]\n",
|
||||
"print(f\"✅ Loaded workshop identifier: {workshop_identifier}\")\n",
|
||||
"\n",
|
||||
"# Compute expected cluster name for validation/display\n",
|
||||
"# Terraform computes: cluster_name = \"langsmith-eks${local.identifier}\"\n",
|
||||
"cluster_name = f\"langsmith-eks{workshop_identifier}\"\n",
|
||||
"\n",
|
||||
"region = config[region_var]\n",
|
||||
"workshop_name = config[\"WORKSHOP_NAME\"]\n",
|
||||
"\n",
|
||||
"print(\"### Terraform Configuration\")\n",
|
||||
"print(\"\\n### Terraform Configuration\")\n",
|
||||
"print(f\"Terraform Directory: {terraform_dir}\")\n",
|
||||
"print(f\"Cluster Name: {cluster_name}\")\n",
|
||||
"print(f\"Workshop Identifier: {workshop_identifier}\")\n",
|
||||
"print(f\"Expected Cluster Name: {cluster_name}\")\n",
|
||||
"print(f\"Region: {region}\")\n",
|
||||
"print(f\"Workshop Name: {workshop_name}\\n\")\n",
|
||||
"print(f\"Workshop Name: {workshop_name}\")\n",
|
||||
"print(f\"\\n💡 Terraform will use this identifier for all resource names:\")\n",
|
||||
"print(f\" Cluster: langsmith-eks{workshop_identifier}\")\n",
|
||||
"print(f\" Redis: langsmith-redis{workshop_identifier}\")\n",
|
||||
"print(f\" S3: langsmith-s3{workshop_identifier}\")\n",
|
||||
"print(f\" Postgres: langsmith-postgres{workshop_identifier}\")\n",
|
||||
"print(f\" VPC: langsmith-vpc{workshop_identifier}\\n\")\n",
|
||||
"\n",
|
||||
"if not terraform_dir.exists():\n",
|
||||
" fail(f\"Terraform directory does not exist: {terraform_dir}\")\n",
|
||||
@@ -371,6 +398,8 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import getpass\n",
|
||||
"\n",
|
||||
"# Create terraform plan\n",
|
||||
"plan_file = artifacts_dir / \"terraform-plan.txt\"\n",
|
||||
"\n",
|
||||
@@ -383,7 +412,22 @@
|
||||
"postgres_username = os.environ.get(\"POSTGRES_USERNAME\", \"\").strip()\n",
|
||||
"postgres_password = os.environ.get(\"POSTGRES_PASSWORD\", \"\").strip()\n",
|
||||
"\n",
|
||||
"if not postgres_username:\n",
|
||||
" print(\"Please provide a PostgreSQL username: \")\n",
|
||||
" postgres_username = input().strip()\n",
|
||||
"\n",
|
||||
"if not postgres_password:\n",
|
||||
" print(\"Please provide a PostgreSQL password: \")\n",
|
||||
" postgres_password = getpass.getpass().strip()\n",
|
||||
"\n",
|
||||
"print(\"### Terraform Variables\\n\")\n",
|
||||
"\n",
|
||||
"# Pass workshop identifier to Terraform\n",
|
||||
"# This is the key variable that controls all resource naming\n",
|
||||
"terraform_vars.extend([\"-var\", f\"identifier={workshop_identifier}\"])\n",
|
||||
"print(f\"✅ IDENTIFIER: {workshop_identifier}\")\n",
|
||||
"print(f\" This will be used for all resource names (cluster, redis, s3, postgres, vpc)\\n\")\n",
|
||||
"\n",
|
||||
"missing_vars = []\n",
|
||||
"\n",
|
||||
"if postgres_username:\n",
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
import os
|
||||
from datetime import date
|
||||
|
||||
def ok(msg: str) -> None:
|
||||
print(f"✅ {msg}")
|
||||
@@ -18,6 +19,9 @@ def require_env(*keys: str) -> dict:
|
||||
if not v:
|
||||
missing.append(k)
|
||||
cfg[k] = v
|
||||
if k == 'CLUSTER_NAME':
|
||||
# Add a hardcoded prefix to the cluster name
|
||||
cfg[k] = f"langsmith-workshop-{date.today().strftime('%Y%m%d')}-{v}"
|
||||
if missing:
|
||||
fail(f"Missing required environment variables: {', '.join(missing)}")
|
||||
return cfg
|
||||
|
||||
Reference in New Issue
Block a user