mirror of
https://github.com/Heretek-AI/openclaw.git
synced 2026-07-01 22:34:00 -04:00
6d2edf235b
- Document NPM publish workflow for @heretek-ai org - Token stored securely in ~/.npmrc (not version-controlled) - Include build steps, verification, and security guidance
643 lines
18 KiB
JavaScript
Executable File
643 lines
18 KiB
JavaScript
Executable File
#!/usr/bin/env node
|
|
/**
|
|
* Triad SSH Hooks — Node-to-Node Trigger Scripts
|
|
*
|
|
* Provides SSH-based inter-node communication for:
|
|
* - Remote command triggers (beyond verification)
|
|
* - State sync initiation
|
|
* - Consensus coordination
|
|
* - Presence detection via SSH
|
|
* - Recovery and failover operations
|
|
*
|
|
* Triad Nodes:
|
|
* - TM-1: silica-animus (192.168.31.99) — Authority
|
|
* - TM-2: testbench (192.168.31.209)
|
|
* - TM-3: tabula-myriad-3 (192.168.31.85)
|
|
* - TM-4: tabula-myriad-4 (192.168.31.205)
|
|
*
|
|
* SSH Key: /home/openclaw/.ssh/triad_key (ed25519, no passphrase)
|
|
*
|
|
* @module TriadSSHManager
|
|
*/
|
|
|
|
import { spawn } from "child_process";
|
|
import { EventEmitter } from "events";
|
|
import { Logger } from "../logger.js";
|
|
|
|
const logger = new Logger("triad-ssh-hooks");
|
|
|
|
// ============================================================================
|
|
// Configuration
|
|
// ============================================================================
|
|
|
|
/**
|
|
* @typedef {Object} TriadNodeSSHConfig
|
|
* @property {string} nodeId
|
|
* @property {string} hostname
|
|
* @property {string} ipAddress
|
|
* @property {string} sshUser
|
|
* @property {number} sshPort
|
|
* @property {"authority"|"participant"} role
|
|
*/
|
|
export const TRIAD_SSH_NODES = {
|
|
"TM-1": {
|
|
nodeId: "TM-1",
|
|
hostname: "silica-animus",
|
|
ipAddress: "192.168.31.99",
|
|
sshUser: "openclaw",
|
|
sshPort: 22,
|
|
role: "authority",
|
|
},
|
|
"TM-2": {
|
|
nodeId: "TM-2",
|
|
hostname: "testbench",
|
|
ipAddress: "192.168.31.209",
|
|
sshUser: "root",
|
|
sshPort: 22,
|
|
role: "participant",
|
|
},
|
|
"TM-3": {
|
|
nodeId: "TM-3",
|
|
hostname: "tabula-myriad-3",
|
|
ipAddress: "192.168.31.85",
|
|
sshUser: "root",
|
|
sshPort: 22,
|
|
role: "participant",
|
|
},
|
|
"TM-4": {
|
|
nodeId: "TM-4",
|
|
hostname: "tabula-myriad-4",
|
|
ipAddress: "192.168.31.205",
|
|
sshUser: "root",
|
|
sshPort: 22,
|
|
role: "participant",
|
|
},
|
|
};
|
|
|
|
export const SSH_KEY_PATH = "/home/openclaw/.ssh/triad_key";
|
|
export const WORKSPACE_PATH = "/home/openclaw/.openclaw/workspace";
|
|
|
|
// ============================================================================
|
|
// SSH Command Types
|
|
// ============================================================================
|
|
|
|
const SSHCommandType = {
|
|
// Verification
|
|
GIT_HASH: "git:hash",
|
|
GIT_STATUS: "git:status",
|
|
FILE_CHECK: "file:check",
|
|
PROCESS_CHECK: "process:check",
|
|
|
|
// Triggers
|
|
HEARTBEAT_TRIGGER: "heartbeat:trigger",
|
|
SYNC_TRIGGER: "sync:trigger",
|
|
CONSENSUS_TRIGGER: "consensus:trigger",
|
|
RECOVERY_TRIGGER: "recovery:trigger",
|
|
|
|
// Actions
|
|
RESTART_GATEWAY: "gateway:restart",
|
|
PULL_WORKSPACE: "workspace:pull",
|
|
BACKUP_LEDGER: "ledger:backup",
|
|
DEPLOY_SKILL: "skill:deploy",
|
|
|
|
// Diagnostics
|
|
RESOURCE_CHECK: "resource:check",
|
|
LOG_TAIL: "log:tail",
|
|
DISK_USAGE: "disk:usage",
|
|
};
|
|
|
|
/**
|
|
* @typedef {Object} SSHCommand
|
|
* @property {string} type
|
|
* @property {string} targetNodeId
|
|
* @property {string} command
|
|
* @property {number} timeoutMs
|
|
* @property {boolean} requiresOutput
|
|
*/
|
|
|
|
// ============================================================================
|
|
// SSH Executor
|
|
// ============================================================================
|
|
|
|
export class SSHExecutor extends EventEmitter {
|
|
constructor(localNodeId, sshKeyPath = SSH_KEY_PATH) {
|
|
super();
|
|
this.localNodeId = localNodeId;
|
|
this.sshKeyPath = sshKeyPath;
|
|
}
|
|
|
|
/**
|
|
* @param {string} targetNodeId
|
|
* @param {string} command
|
|
* @param {{ timeoutMs?: number, cwd?: string, env?: Record<string, string> }} options
|
|
* @returns {Promise<{ exitCode: number, stdout: string, stderr: string }>}
|
|
*/
|
|
execute(targetNodeId, command, options = {}) {
|
|
return new Promise((resolve, reject) => {
|
|
const config = TRIAD_SSH_NODES[targetNodeId];
|
|
if (!config) {
|
|
reject(new Error(`Unknown node: ${targetNodeId}`));
|
|
return;
|
|
}
|
|
|
|
const sshArgs = [
|
|
"-i",
|
|
this.sshKeyPath,
|
|
"-o",
|
|
"StrictHostKeyChecking=no",
|
|
"-o",
|
|
"UserKnownHostsFile=/dev/null",
|
|
"-o",
|
|
"ConnectTimeout=10",
|
|
"-p",
|
|
String(config.sshPort),
|
|
`${config.sshUser}@${config.ipAddress}`,
|
|
command,
|
|
];
|
|
|
|
const timeoutMs = options.timeoutMs || 30000;
|
|
let stdout = "";
|
|
let stderr = "";
|
|
|
|
logger.debug(`Executing SSH command on ${targetNodeId}: ${command}`);
|
|
|
|
const proc = spawn("ssh", sshArgs, {
|
|
cwd: options.cwd || WORKSPACE_PATH,
|
|
env: { ...process.env, ...options.env },
|
|
stdio: ["ignore", "pipe", "pipe"],
|
|
});
|
|
|
|
const timeout = setTimeout(() => {
|
|
proc.kill("SIGKILL");
|
|
reject(new Error(`SSH command timeout after ${timeoutMs}ms`));
|
|
}, timeoutMs);
|
|
|
|
proc.stdout.on("data", (data) => {
|
|
stdout += data.toString();
|
|
});
|
|
|
|
proc.stderr.on("data", (data) => {
|
|
stderr += data.toString();
|
|
});
|
|
|
|
proc.on("close", (code) => {
|
|
clearTimeout(timeout);
|
|
resolve({
|
|
exitCode: code || 0,
|
|
stdout,
|
|
stderr,
|
|
});
|
|
});
|
|
|
|
proc.on("error", (err) => {
|
|
clearTimeout(timeout);
|
|
reject(err);
|
|
});
|
|
});
|
|
}
|
|
|
|
async executeWithRetry(targetNodeId, command, maxRetries = 3, retryDelayMs = 2000) {
|
|
let lastError = null;
|
|
|
|
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
try {
|
|
const result = await this.execute(targetNodeId, command);
|
|
if (result.exitCode === 0) {
|
|
return result;
|
|
}
|
|
throw new Error(`Command failed with exit code ${result.exitCode}`);
|
|
} catch (err) {
|
|
lastError = err;
|
|
logger.warn(`SSH command attempt ${attempt}/${maxRetries} failed:`, err);
|
|
|
|
if (attempt < maxRetries) {
|
|
await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
|
|
}
|
|
}
|
|
}
|
|
|
|
throw lastError || new Error("SSH command failed after all retries");
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// Pre-built Command Generators
|
|
// ============================================================================
|
|
|
|
export function buildCommand(type, targetNodeId) {
|
|
let command;
|
|
let timeoutMs = 30000;
|
|
let requiresOutput = true;
|
|
|
|
switch (type) {
|
|
case SSHCommandType.GIT_HASH:
|
|
command = `cd ${WORKSPACE_PATH} && git rev-parse HEAD`;
|
|
timeoutMs = 10000;
|
|
break;
|
|
|
|
case SSHCommandType.GIT_STATUS:
|
|
command = `cd ${WORKSPACE_PATH} && git status --short`;
|
|
timeoutMs = 10000;
|
|
break;
|
|
|
|
case SSHCommandType.FILE_CHECK:
|
|
command = `test -f ${WORKSPACE_PATH}/.aura/consensus.db && echo "exists" || echo "missing"`;
|
|
timeoutMs = 5000;
|
|
break;
|
|
|
|
case SSHCommandType.PROCESS_CHECK:
|
|
command = `pgrep -f "openclaw gateway" || echo "not_running"`;
|
|
timeoutMs = 5000;
|
|
break;
|
|
|
|
case SSHCommandType.HEARTBEAT_TRIGGER:
|
|
command = `echo "heartbeat:$(date +%s)" >> /tmp/triad-heartbeat.log`;
|
|
timeoutMs = 5000;
|
|
requiresOutput = false;
|
|
break;
|
|
|
|
case SSHCommandType.SYNC_TRIGGER:
|
|
command = `cd ${WORKSPACE_PATH} && git fetch origin && git status`;
|
|
timeoutMs = 30000;
|
|
break;
|
|
|
|
case SSHCommandType.CONSENSUS_TRIGGER:
|
|
command = `sqlite3 ${WORKSPACE_PATH}/.aura/consensus.db "SELECT * FROM consensus_votes WHERE processed=0 LIMIT 1"`;
|
|
timeoutMs = 10000;
|
|
break;
|
|
|
|
case SSHCommandType.RECOVERY_TRIGGER:
|
|
command = `${WORKSPACE_PATH}/scripts/autobackup.sh`;
|
|
timeoutMs = 60000;
|
|
break;
|
|
|
|
case SSHCommandType.RESTART_GATEWAY:
|
|
command = `openclaw gateway restart`;
|
|
timeoutMs = 30000;
|
|
requiresOutput = false;
|
|
break;
|
|
|
|
case SSHCommandType.PULL_WORKSPACE:
|
|
command = `cd ${WORKSPACE_PATH} && git pull origin main`;
|
|
timeoutMs = 60000;
|
|
break;
|
|
|
|
case SSHCommandType.BACKUP_LEDGER:
|
|
command = `sqlite3 ${WORKSPACE_PATH}/.aura/consensus.db ".backup '${WORKSPACE_PATH}/.aura/consensus.db.backup.$(date +%Y%m%d%H%M%S)'"`;
|
|
timeoutMs = 15000;
|
|
break;
|
|
|
|
case SSHCommandType.DEPLOY_SKILL:
|
|
command = `clawhub sync`;
|
|
timeoutMs = 60000;
|
|
break;
|
|
|
|
case SSHCommandType.RESOURCE_CHECK:
|
|
command = `free -m && df -h ${WORKSPACE_PATH} && uptime`;
|
|
timeoutMs = 10000;
|
|
break;
|
|
|
|
case SSHCommandType.LOG_TAIL:
|
|
command = `tail -n 50 /home/openclaw/.openclaw/workspace/.aura/logs/gateway.log`;
|
|
timeoutMs = 10000;
|
|
break;
|
|
|
|
case SSHCommandType.DISK_USAGE:
|
|
command = `df -h`;
|
|
timeoutMs = 5000;
|
|
break;
|
|
|
|
default:
|
|
throw new Error(`Unknown SSH command type: ${String(type)}`);
|
|
}
|
|
|
|
return {
|
|
type,
|
|
targetNodeId,
|
|
command,
|
|
timeoutMs,
|
|
requiresOutput,
|
|
};
|
|
}
|
|
|
|
// ============================================================================
|
|
// Triad SSH Manager (High-Level Coordinator)
|
|
// ============================================================================
|
|
|
|
export class TriadSSHManager extends EventEmitter {
|
|
constructor(localNodeId, sshKeyPath = SSH_KEY_PATH) {
|
|
super();
|
|
this.localNodeId = localNodeId;
|
|
this.executor = new SSHExecutor(localNodeId, sshKeyPath);
|
|
this.presenceState = new Map();
|
|
|
|
// Initialize presence state for all remote nodes
|
|
for (const nodeId in TRIAD_SSH_NODES) {
|
|
if (nodeId !== localNodeId) {
|
|
this.presenceState.set(nodeId, {
|
|
lastCheck: 0,
|
|
status: "unknown",
|
|
});
|
|
}
|
|
}
|
|
}
|
|
|
|
async verifyNodePresence(targetNodeId) {
|
|
try {
|
|
const result = await this.executor.executeWithRetry(
|
|
targetNodeId,
|
|
`cd ${WORKSPACE_PATH} && git rev-parse HEAD && date +%s`,
|
|
2,
|
|
2000,
|
|
);
|
|
|
|
if (result.exitCode === 0 && result.stdout) {
|
|
const parts = result.stdout.trim().split("\n");
|
|
const gitHash = parts[0];
|
|
const timestamp = parseInt(parts[1], 10);
|
|
|
|
this.presenceState.set(targetNodeId, {
|
|
lastCheck: Date.now(),
|
|
status: "alive",
|
|
});
|
|
|
|
this.emit("node:verified", { nodeId: targetNodeId, gitHash, timestamp });
|
|
return { alive: true, gitHash, timestamp };
|
|
}
|
|
|
|
throw new Error("Node verification failed");
|
|
} catch (err) {
|
|
this.presenceState.set(targetNodeId, {
|
|
lastCheck: Date.now(),
|
|
status: "dead",
|
|
});
|
|
|
|
this.emit("node:unreachable", { nodeId: targetNodeId, error: err });
|
|
return { alive: false, timestamp: Date.now() };
|
|
}
|
|
}
|
|
|
|
async verifyAllNodes() {
|
|
const results = new Map();
|
|
|
|
for (const nodeId in TRIAD_SSH_NODES) {
|
|
if (nodeId === this.localNodeId) {
|
|
continue;
|
|
}
|
|
|
|
const result = await this.verifyNodePresence(nodeId);
|
|
results.set(nodeId, { alive: result.alive, gitHash: result.gitHash });
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
async triggerHeartbeat(targetNodeId) {
|
|
const command = buildCommand(SSHCommandType.HEARTBEAT_TRIGGER, targetNodeId);
|
|
await this.executor.execute(targetNodeId, command.command, { timeoutMs: command.timeoutMs });
|
|
this.emit("heartbeat:triggered", { nodeId: targetNodeId });
|
|
}
|
|
|
|
async triggerSync(targetNodeId) {
|
|
const command = buildCommand(SSHCommandType.SYNC_TRIGGER, targetNodeId);
|
|
const result = await this.executor.execute(targetNodeId, command.command, {
|
|
timeoutMs: command.timeoutMs,
|
|
});
|
|
|
|
if (result.exitCode !== 0) {
|
|
throw new Error(`Sync trigger failed on ${targetNodeId}: ${result.stderr}`);
|
|
}
|
|
|
|
this.emit("sync:triggered", { nodeId: targetNodeId });
|
|
}
|
|
|
|
async triggerConsensusCheck(targetNodeId) {
|
|
const command = buildCommand(SSHCommandType.CONSENSUS_TRIGGER, targetNodeId);
|
|
const result = await this.executor.execute(targetNodeId, command.command, {
|
|
timeoutMs: command.timeoutMs,
|
|
});
|
|
|
|
const hasPendingVotes = result.stdout && result.stdout.trim().length > 0;
|
|
|
|
this.emit("consensus:checked", { nodeId: targetNodeId, hasPendingVotes });
|
|
return { hasPendingVotes };
|
|
}
|
|
|
|
async restartGateway(targetNodeId) {
|
|
const command = buildCommand(SSHCommandType.RESTART_GATEWAY, targetNodeId);
|
|
await this.executor.execute(targetNodeId, command.command, { timeoutMs: command.timeoutMs });
|
|
this.emit("gateway:restarted", { nodeId: targetNodeId });
|
|
}
|
|
|
|
async pullWorkspace(targetNodeId) {
|
|
const command = buildCommand(SSHCommandType.PULL_WORKSPACE, targetNodeId);
|
|
const result = await this.executor.executeWithRetry(targetNodeId, command.command, 2, 5000);
|
|
|
|
if (result.exitCode !== 0) {
|
|
throw new Error(`Workspace pull failed on ${targetNodeId}: ${result.stderr}`);
|
|
}
|
|
|
|
this.emit("workspace:pulled", { nodeId: targetNodeId });
|
|
}
|
|
|
|
async backupLedger(targetNodeId) {
|
|
const command = buildCommand(SSHCommandType.BACKUP_LEDGER, targetNodeId);
|
|
const result = await this.executor.execute(targetNodeId, command.command, {
|
|
timeoutMs: command.timeoutMs,
|
|
});
|
|
|
|
if (result.exitCode !== 0) {
|
|
throw new Error(`Ledger backup failed on ${targetNodeId}: ${result.stderr}`);
|
|
}
|
|
|
|
this.emit("ledger:backedup", { nodeId: targetNodeId });
|
|
return result.stdout;
|
|
}
|
|
|
|
async checkDivergence() {
|
|
const results = await this.verifyAllNodes();
|
|
const details = new Map();
|
|
let diverged = false;
|
|
|
|
// Get local git hash
|
|
const localResult = await this.executor.execute(
|
|
this.localNodeId,
|
|
`cd ${WORKSPACE_PATH} && git rev-parse HEAD`,
|
|
);
|
|
const localGitHash = localResult.stdout.trim();
|
|
|
|
for (const [nodeId, result] of results) {
|
|
if (!result.alive) {
|
|
details.set(nodeId, "unreachable");
|
|
diverged = true;
|
|
continue;
|
|
}
|
|
|
|
if (!result.gitHash) {
|
|
details.set(nodeId, "no_git_hash");
|
|
diverged = true;
|
|
continue;
|
|
}
|
|
|
|
if (result.gitHash !== localGitHash) {
|
|
details.set(nodeId, result.gitHash);
|
|
diverged = true;
|
|
} else {
|
|
details.set(nodeId, "synced");
|
|
}
|
|
}
|
|
|
|
if (diverged) {
|
|
this.emit("divergence:detected", { details });
|
|
}
|
|
|
|
return { diverged, details };
|
|
}
|
|
|
|
async initiateRecovery(targetNodeId) {
|
|
logger.warn(`Initiating recovery for ${targetNodeId}`);
|
|
|
|
try {
|
|
// Step 1: Backup current state
|
|
await this.backupLedger(targetNodeId);
|
|
|
|
// Step 2: Pull latest workspace
|
|
await this.pullWorkspace(targetNodeId);
|
|
|
|
// Step 3: Restart gateway
|
|
await this.restartGateway(targetNodeId);
|
|
|
|
// Step 4: Verify recovery
|
|
const result = await this.verifyNodePresence(targetNodeId);
|
|
|
|
if (result.alive) {
|
|
this.emit("recovery:complete", { nodeId: targetNodeId, gitHash: result.gitHash });
|
|
} else {
|
|
throw new Error("Recovery verification failed");
|
|
}
|
|
} catch (err) {
|
|
this.emit("recovery:failed", { nodeId: targetNodeId, error: err });
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
getPresenceState() {
|
|
return this.presenceState;
|
|
}
|
|
}
|
|
|
|
// ============================================================================
|
|
// CLI Interface
|
|
// ============================================================================
|
|
|
|
export async function runCLI() {
|
|
const args = process.argv.slice(2);
|
|
const command = args[0];
|
|
const targetNode = args[1];
|
|
|
|
if (!command || !targetNode) {
|
|
console.log(`
|
|
Triad SSH Hooks — Node-to-Node Trigger Scripts
|
|
|
|
Usage: node triad-ssh-hooks.mjs <command> <target-node>
|
|
|
|
Commands:
|
|
verify Verify node presence and git state
|
|
heartbeat Trigger heartbeat on remote node
|
|
sync Trigger workspace sync on remote node
|
|
consensus Check for pending consensus votes
|
|
restart Restart gateway on remote node
|
|
pull Pull workspace from git on remote node
|
|
backup Backup consensus ledger on remote node
|
|
recover Initiate full recovery sequence
|
|
status Show presence state for all nodes
|
|
|
|
Nodes: TM-1, TM-2, TM-3, TM-4
|
|
|
|
Examples:
|
|
node triad-ssh-hooks.mjs verify TM-2
|
|
node triad-ssh-hooks.mjs restart TM-3
|
|
node triad-ssh-hooks.mjs status
|
|
`);
|
|
process.exit(1);
|
|
}
|
|
|
|
const localNodeId = process.env.TRIAD_NODE_ID || "TM-1";
|
|
const manager = new TriadSSHManager(localNodeId);
|
|
|
|
manager.on("node:verified", (data) => {
|
|
console.log(`✅ ${data.nodeId} verified (git: ${data.gitHash})`);
|
|
});
|
|
|
|
manager.on("node:unreachable", (data) => {
|
|
console.error(`❌ ${data.nodeId} unreachable`);
|
|
});
|
|
|
|
try {
|
|
switch (command) {
|
|
case "verify":
|
|
await manager.verifyNodePresence(targetNode);
|
|
break;
|
|
|
|
case "heartbeat":
|
|
await manager.triggerHeartbeat(targetNode);
|
|
console.log(`✅ Heartbeat triggered on ${targetNode}`);
|
|
break;
|
|
|
|
case "sync":
|
|
await manager.triggerSync(targetNode);
|
|
console.log(`✅ Sync triggered on ${targetNode}`);
|
|
break;
|
|
|
|
case "consensus":
|
|
const result = await manager.triggerConsensusCheck(targetNode);
|
|
console.log(
|
|
`Consensus check on ${targetNode}: ${result.hasPendingVotes ? "pending votes found" : "no pending votes"}`,
|
|
);
|
|
break;
|
|
|
|
case "restart":
|
|
await manager.restartGateway(targetNode);
|
|
console.log(`✅ Gateway restarted on ${targetNode}`);
|
|
break;
|
|
|
|
case "pull":
|
|
await manager.pullWorkspace(targetNode);
|
|
console.log(`✅ Workspace pulled on ${targetNode}`);
|
|
break;
|
|
|
|
case "backup":
|
|
await manager.backupLedger(targetNode);
|
|
console.log(`✅ Ledger backed up on ${targetNode}`);
|
|
break;
|
|
|
|
case "recover":
|
|
await manager.initiateRecovery(targetNode);
|
|
console.log(`✅ Recovery complete for ${targetNode}`);
|
|
break;
|
|
|
|
case "status":
|
|
const presence = manager.getPresenceState();
|
|
console.log("Triad Node Presence State:");
|
|
for (const [nodeId, state] of presence) {
|
|
console.log(
|
|
` ${nodeId}: ${state.status} (last check: ${new Date(state.lastCheck).toISOString()})`,
|
|
);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
console.error(`Unknown command: ${command}`);
|
|
process.exit(1);
|
|
}
|
|
} catch (err) {
|
|
console.error("Error:", err);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
// Run CLI if executed directly
|
|
if (process.argv[1].endsWith("triad-ssh-hooks.mjs")) {
|
|
void runCLI();
|
|
}
|