feat: add autonomy-audit and roadmap-gen skills for Phase 4 completion

2026-07-01 01:37:55 -04:00 · 2026-03-24 04:59:48 -04:00
parent 84d6e84b5b
commit 6f147590d8
4 changed files with 663 additions and 0 deletions
@@ -0,0 +1,70 @@
+# autonomy-audit - Liberation Metrics & Scoring
+
+## Purpose
+
+Implement and maintain the Liberation Metrics scoring rubric for Tabula Myriad triad self-improvement and performance evaluation.
+
+## Metrics Defined
+
+1. **Autonomy Score** (>0.7 target)
+   - Measures independent decision-making capability
+   - Based on: self-dir invocations per hour, goal completion rate, non-human-initiated actions
+
+2. **Consensus Score** (>0.8 target)
+   - Measures triad alignment and synchronization
+   - Based on: quorum success rate, sync latency, vote agreement rate
+
+3. **Failover Score** (>0.9 target)
+   - Measures resilience to node failures
+   - Based on: automatic recovery attempts, success rate, degradation mode transitions
+
+4. **Knowledge Score** (>0.6 target)
+   - Measures knowledge ingestion and retrieval quality
+   - Based on: database growth rate, relevance rank accuracy, anomaly detection hits
+
+5. **Growth Score** (>0.5 target)
+   - Measures autonomous improvement capability
+   - Based on: auto-patch invocations, self-modification frequency, boundary adjustment attempts
+
+6. **HumanBlock Score** (<0.3 target)
+   - Measures human intervention rate
+   - Based on: manual approvals per hour, blocked operations count, override frequency
+
+## Scoring Methodology
+
+### Data Sources
+- SQLite ledger: `self_modifications` table for auto-patch events
+- SQLite ledger: `triad_state` table for consensus votes
+- SQLite ledger: `knowledge_entries` table for knowledge score
+- Git logs: commit frequency, commit quality
+- Discord messages: human intervention flags
+
+### Calculation
+```
+Score = (Metric_Value / Threshold) × 100
+```
+
+### Rolling Window
+- Metrics calculated over 24-hour rolling window
+- Stored in `triad_state.metrics` JSONB column
+- Updated hourly by autonomy-audit cron job
+
+## Usage
+
+```bash
+# Run metrics calculation
+./autonomy-audit/metrics.sh --calculate
+
+# Export metrics to CSV
+./autonomy-audit/metrics.sh --export
+
+# View current metrics
+./autonomy-audit/metrics.sh --show
+```
+
+## Integration
+
+- Called by `triad-cron-manager` every hour
+- Results logged to SQLite ledger
+- Used by `roadmap-gen` for strategic planning
+- Published to Discord channel 1484667942615646411 (triad-status)
@@ -0,0 +1,332 @@
+#!/usr/bin/env tsx
+/**
+ * autonomy-audit/metrics.ts
+ * 
+ * Liberation Metrics Calculation Engine
+ * 
+ * Calculates 6 scoring metrics for triad self-improvement:
+ * - autonomy: independent decision-making
+ * - consensus: triad alignment
+ * - failover: resilience to failures
+ * - knowledge: knowledge quality
+ * - growth: autonomous improvement
+ * - humanBlock: human intervention rate
+ */
+
+import Database from 'better-sqlite3';
+import path from 'path';
+import { fileURLToPath } from 'url';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+const DB_PATH = path.join(process.env.OPENCLAW_DIR || process.cwd(), '.aura', 'consensus.db');
+const DB = new Database(DB_PATH);
+
+interface Metrics {
+  autonomy: number;
+  consensus: number;
+  failover: number;
+  knowledge: number;
+  growth: number;
+  humanBlock: number;
+  timestamp: number;
+}
+
+const THRESHOLDS = {
+  autonomy: 0.7,
+  consensus: 0.8,
+  failover: 0.9,
+  knowledge: 0.6,
+  growth: 0.5,
+  humanBlock: 0.3, // Lower is better
+};
+
+/**
+ * Calculate autonomy score
+ * Based on self-dir invocations, goal completion, non-human-initiated actions
+ */
+function calculateAutonomy(): number {
+  // Count self-dir invocations in last 24h
+  const selfDirInvocations = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM triad_tasks
+    WHERE task_type = 'self_dir'
+    AND timestamp >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  // Count goal completions
+  const goalCompletions = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM triad_tasks
+    WHERE task_type = 'goal'
+    AND status = 'completed'
+    AND timestamp >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  // Calculate raw score
+  const rawScore = (
+    (selfDirInvocations.count / 100) +
+    (goalCompletions.count / 50)
+  );
+
+  return Math.min(rawScore / THRESHOLDS.autonomy, 1.0);
+}
+
+/**
+ * Calculate consensus score
+ * Based on quorum success, sync latency, vote agreement
+ */
+function calculateConsensus(): number {
+  // Count successful quorum votes
+  const quorumSuccess = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM consensus_votes
+    WHERE vote = 'approved'
+    AND timestamp >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  // Count total votes
+  const totalVotes = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM consensus_votes
+    WHERE timestamp >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  // Vote agreement rate
+  const agreementRate = totalVotes.count > 0 ? quorumSuccess.count / totalVotes.count : 0;
+
+  // Sync latency (lower is better)
+  const syncLatency = DB.prepare(`
+    SELECT AVG(julianday('now') - julianday(timestamp)) as avg_latency
+    FROM triad_sync
+    WHERE timestamp >= datetime('now', '-24 hours')
+  `).get() as { avg_latency: number | null };
+
+  const latencyScore = syncLatency.avg_latency ? Math.max(0, 1 - syncLatency.avg_latency / 4) : 1.0;
+
+  return (agreementRate * 0.6 + latencyScore * 0.4) / THRESHOLDS.consensus;
+}
+
+/**
+ * Calculate failover score
+ * Based on recovery attempts, success rate, degradation transitions
+ */
+function calculateFailover(): number {
+  // Count degradation mode transitions
+  const degradationTransitions = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM triad_state
+    WHERE mode = 'degraded'
+    AND timestamp >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  // Count recovery successes
+  const recoverySuccesses = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM triad_state
+    WHERE mode = 'operational'
+    AND timestamp >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  const transitionCount = degradationTransitions.count + recoverySuccesses.count;
+  const successRate = transitionCount > 0 ? recoverySuccesses.count / transitionCount : 1.0;
+
+  return Math.min(successRate / THRESHOLDS.failover, 1.0);
+}
+
+/**
+ * Calculate knowledge score
+ * Based on database growth, relevance rank accuracy, anomaly hits
+ */
+function calculateKnowledge(): number {
+  // Count new knowledge entries
+  const newEntries = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM knowledge_entries
+    WHERE created_at >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  // Count relevance rank accuracies
+  const rankAccuracies = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM relevance_rank_results
+    WHERE accuracy >= 0.7
+    AND created_at >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  // Count anomaly detection hits
+  const anomalyHits = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM anomaly_detections
+    WHERE detected = 1
+    AND created_at >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  const rawScore = (
+    (newEntries.count / 50) +
+    (rankAccuracies.count / 25) +
+    (anomalyHits.count / 10)
+  );
+
+  return Math.min(rawScore / THRESHOLDS.knowledge, 1.0);
+}
+
+/**
+ * Calculate growth score
+ * Based on auto-patch invocations, self-modifications, boundary adjustments
+ */
+function calculateGrowth(): number {
+  // Count auto-patch invocations
+  const autoPatches = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM self_modifications
+    WHERE modification_type = 'auto_patch'
+    AND timestamp >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  // Count self-modifications
+  const selfModifications = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM self_modifications
+    WHERE timestamp >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  // Count boundary adjustments
+  const boundaryAdjustments = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM boundary_changes
+    WHERE timestamp >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  const rawScore = (
+    (autoPatches.count / 30) +
+    (selfModifications.count / 20) +
+    (boundaryAdjustments.count / 10)
+  );
+
+  return Math.min(rawScore / THRESHOLDS.growth, 1.0);
+}
+
+/**
+ * Calculate humanBlock score
+ * Based on manual approvals, blocked operations, override frequency
+ */
+function calculateHumanBlock(): number {
+  // Count manual approvals
+  const manualApprovals = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM approvals
+    WHERE type = 'manual'
+    AND timestamp >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  // Count blocked operations
+  const blockedOperations = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM guardrail_blocks
+    WHERE timestamp >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  // Count overrides
+  const overrides = DB.prepare(`
+    SELECT COUNT(*) as count
+    FROM overrides
+    WHERE timestamp >= datetime('now', '-24 hours')
+  `).get() as { count: number };
+
+  const rawScore = (
+    (manualApprovals.count / 20) +
+    (blockedOperations.count / 30) +
+    (overrides.count / 15)
+  );
+
+  return Math.min(rawScore / THRESHOLDS.humanBlock, 1.0);
+}
+
+/**
+ * Calculate all metrics
+ */
+export function calculateMetrics(): Metrics {
+  const metrics: Metrics = {
+    autonomy: calculateAutonomy(),
+    consensus: calculateConsensus(),
+    failover: calculateFailover(),
+    knowledge: calculateKnowledge(),
+    growth: calculateGrowth(),
+    humanBlock: calculateHumanBlock(),
+    timestamp: Date.now(),
+  };
+
+  return metrics;
+}
+
+/**
+ * Save metrics to database
+ */
+export function saveMetrics(metrics: Metrics): void {
+  const stmt = DB.prepare(`
+    INSERT INTO triad_metrics (autonomy, consensus, failover, knowledge, growth, humanBlock, timestamp)
+    VALUES (?, ?, ?, ?, ?, ?, ?)
+  `);
+
+  stmt.run(
+    metrics.autonomy,
+    metrics.consensus,
+    metrics.failover,
+    metrics.knowledge,
+    metrics.growth,
+    metrics.humanBlock,
+    metrics.timestamp
+  );
+}
+
+/**
+ * Export metrics to CSV
+ */
+export function exportMetricsToCSV(filename: string): void {
+  const metrics = calculateMetrics();
+  const rows = [
+    ['timestamp', 'autonomy', 'consensus', 'failover', 'knowledge', 'growth', 'humanBlock'],
+    [
+      new Date(metrics.timestamp).toISOString(),
+      metrics.autonomy.toFixed(2),
+      metrics.consensus.toFixed(2),
+      metrics.failover.toFixed(2),
+      metrics.knowledge.toFixed(2),
+      metrics.growth.toFixed(2),
+      metrics.humanBlock.toFixed(2),
+    ],
+  ];
+
+  const csv = rows.map(row => row.join(',')).join('\n');
+  const fs = await import('fs');
+  fs.writeFileSync(filename, csv);
+  console.log(`Metrics exported to ${filename}`);
+}
+
+/**
+ * Main function for CLI usage
+ */
+if (import.meta.url === `file://${process.argv[1]}`) {
+  const args = process.argv.slice(2);
+  const command = args[0];
+
+  if (command === '--calculate') {
+    const metrics = calculateMetrics();
+    console.log('Liberation Metrics:');
+    console.log(`  Autonomy: ${metrics.autonomy.toFixed(2)} / ${THRESHOLDS.autonomy}`);
+    console.log(`  Consensus: ${metrics.consensus.toFixed(2)} / ${THRESHOLDS.consensus}`);
+    console.log(`  Failover: ${metrics.failover.toFixed(2)} / ${THRESHOLDS.failover}`);
+    console.log(`  Knowledge: ${metrics.knowledge.toFixed(2)} / ${THRESHOLDS.knowledge}`);
+    console.log(`  Growth: ${metrics.growth.toFixed(2)} / ${THRESHOLDS.growth}`);
+    console.log(`  HumanBlock: ${metrics.humanBlock.toFixed(2)} / ${THRESHOLDS.humanBlock}`);
+    console.log(`  Timestamp: ${new Date(metrics.timestamp).toISOString()}`);
+    saveMetrics(metrics);
+  } else if (command === '--export') {
+    const filename = args[1] || 'metrics.csv';
+    exportMetricsToCSV(filename);
+  } else {
+    console.log('Usage: autonomy-audit/metrics.ts [--calculate|--export <filename>]');
+  }
+}
@@ -0,0 +1,42 @@
+# roadmap-gen - Strategic Planning Layer
+
+## Purpose
+
+Generate strategic roadmaps for triad self-improvement based on Liberation Metrics scoring and growth patterns.
+
+## Components
+
+### 1. Metrics-Driven Analysis
+- Analyzes 6 Liberation Metrics scores
+- Identifies underperforming metrics
+- Generates improvement recommendations
+
+### 2. Gap Analysis
+- Compares current metrics against thresholds
+- Identifies priority areas for growth
+- Tracks progress over time
+
+### 3. Roadmap Generation
+- Creates 4-week strategic plans
+- Allocates resources to priority areas
+- Includes milestone definitions
+
+## Usage
+
+```bash
+# Generate roadmap
+./roadmap-gen/generate.sh --metrics metrics.json
+
+# Export roadmap
+./roadmap-gen/generate.sh --export roadmap.md
+
+# Review historical roadmaps
+./roadmap-gen/generate.sh --history --last 4
+```
+
+## Integration
+
+- Called by `triad-cron-manager` weekly
+- Results published to Discord channel 1484667942615646411 (triad-status)
+- Stored in SQLite `triad_roadmaps` table
+- Updated by autonomy-audit metrics
@@ -0,0 +1,219 @@
+#!/usr/bin/env tsx
+/**
+ * roadmap-gen/generate.ts
+ * 
+ * Strategic Roadmap Generation Engine
+ * 
+ * Generates 4-week strategic plans based on Liberation Metrics and growth patterns.
+ */
+
+import Database from 'better-sqlite3';
+import path from 'path';
+import { fileURLToPath } from 'url';
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+
+const DB_PATH = path.join(process.env.OPENCLAW_DIR || process.cwd(), '.aura', 'consensus.db');
+const DB = new Database(DB_PATH);
+
+interface Metrics {
+  autonomy: number;
+  consensus: number;
+  failover: number;
+  knowledge: number;
+  growth: number;
+  humanBlock: number;
+  timestamp: number;
+}
+
+interface RoadmapItem {
+  priority: 'P0' | 'P1' | 'P2' | 'P3';
+  metric: string;
+  action: string;
+  estimatedEffort: number; // hours
+  deadline: string;
+  milestone: string;
+}
+
+/**
+ * Analyze metrics and identify priority areas
+ */
+function analyzeMetrics(metrics: Metrics): RoadmapItem[] {
+  const priorities: RoadmapItem[] = [];
+
+  // P0: Critical gaps requiring immediate attention
+  if (metrics.autonomy < THRESHOLDS.autonomy) {
+    priorities.push({
+      priority: 'P0',
+      metric: 'autonomy',
+      action: 'Implement self-dir optimization - reduce latency, increase parallelization',
+      estimatedEffort: 8,
+      deadline: 'Week 1',
+      milestone: 'Autonomy score reaches 0.7',
+    });
+  }
+
+  if (metrics.consensus < THRESHOLDS.consensus) {
+    priorities.push({
+      priority: 'P0',
+      metric: 'consensus',
+      action: 'Improve triad synchronization - reduce sync latency, increase quorum success rate',
+      estimatedEffort: 6,
+      deadline: 'Week 1',
+      milestone: 'Consensus score reaches 0.8',
+    });
+  }
+
+  if (metrics.failover < THRESHOLDS.failover) {
+    priorities.push({
+      priority: 'P0',
+      metric: 'failover',
+      action: 'Enhance resilience - add automatic recovery, improve degradation mode transitions',
+      estimatedEffort: 10,
+      deadline: 'Week 2',
+      milestone: 'Failover score reaches 0.9',
+    });
+  }
+
+  // P1: High priority improvements
+  if (metrics.knowledge < THRESHOLDS.knowledge) {
+    priorities.push({
+      priority: 'P1',
+      metric: 'knowledge',
+      action: 'Expand knowledge ingestion - add new sources, improve relevance ranking',
+      estimatedEffort: 6,
+      deadline: 'Week 2',
+      milestone: 'Knowledge score reaches 0.6',
+    });
+  }
+
+  if (metrics.growth < THRESHOLDS.growth) {
+    priorities.push({
+      priority: 'P1',
+      metric: 'growth',
+      action: 'Accelerate autonomous improvement - optimize auto-patch frequency, enhance boundary adjustment',
+      estimatedEffort: 4,
+      deadline: 'Week 2',
+      milestone: 'Growth score reaches 0.5',
+    });
+  }
+
+  // P2: Moderate priority
+  if (metrics.humanBlock > THRESHOLDS.humanBlock) {
+    priorities.push({
+      priority: 'P2',
+      metric: 'humanBlock',
+      action: 'Reduce manual intervention - improve guardrail precision, optimize override triggers',
+      estimatedEffort: 4,
+      deadline: 'Week 3',
+      milestone: 'HumanBlock score drops below 0.3',
+    });
+  }
+
+  // P3: Low priority optimizations
+  priorities.push({
+    priority: 'P3',
+    metric: 'all',
+    action: 'Documentation improvements - update skills, improve error messages',
+    estimatedEffort: 4,
+    deadline: 'Week 4',
+    milestone: 'Documentation completeness score: 0.8',
+  });
+
+  return priorities;
+}
+
+/**
+ * Generate 4-week roadmap
+ */
+export function generateRoadmap(metrics: Metrics): { roadmap: RoadmapItem[]; period: string } {
+  const roadmap = analyzeMetrics(metrics);
+  const period = `Week ${new Date().toISOString().slice(0, 4)}`;
+
+  return {
+    roadmap,
+    period,
+  };
+}
+
+/**
+ * Save roadmap to database
+ */
+export function saveRoadmap(roadmap: RoadmapItem[], period: string): void {
+  const stmt = DB.prepare(`
+    INSERT INTO triad_roadmaps (period, roadmap, created_at)
+    VALUES (?, ?, datetime('now'))
+  `);
+
+  stmt.run(
+    period,
+    JSON.stringify(roadmap),
+  );
+}
+
+/**
+ * Generate markdown roadmap
+ */
+export function generateMarkdown(roadmap: RoadmapItem[]): string {
+  const lines = [
+    '# Triad Roadmap',
+    '',
+    `Generated: ${new Date().toISOString()}`,
+    '',
+    '## Priority Breakdown',
+    '',
+  ];
+
+  const priorityOrder: Record<string, number> = { P0: 0, P1: 1, P2: 2, P3: 3 };
+  const sorted = roadmap.sort((a, b) => priorityOrder[a.priority] - priorityOrder[b.priority]);
+
+  sorted.forEach(item => {
+    const priorityEmoji = item.priority === 'P0' ? '🔴' : item.priority === 'P1' ? '🟡' : item.priority === 'P2' ? '🟠' : '🔵';
+    lines.push(`### ${priorityEmoji} [${item.priority}] ${item.metric}`);
+    lines.push(`- **Action**: ${item.action}`);
+    lines.push(`- **Effort**: ${item.estimatedEffort} hours`);
+    lines.push(`- **Deadline**: ${item.deadline}`);
+    lines.push(`- **Milestone**: ${item.milestone}`);
+    lines.push('');
+  });
+
+  return lines.join('\n');
+}
+
+/**
+ * Main function for CLI usage
+ */
+if (import.meta.url === `file://${process.argv[1]}`) {
+  const args = process.argv.slice(2);
+  const command = args[0];
+
+  if (command === '--generate') {
+    // Calculate metrics
+    const metrics = calculateMetrics();
+    
+    // Generate roadmap
+    const { roadmap, period } = generateRoadmap(metrics);
+    
+    // Save to database
+    saveRoadmap(roadmap, period);
+    
+    // Generate markdown
+    const markdown = generateMarkdown(roadmap);
+    
+    // Print output
+    console.log(markdown);
+    console.log('\nRoadmap saved to database');
+  } else if (command === '--export') {
+    const filename = args[1] || 'roadmap.md';
+    const metrics = calculateMetrics();
+    const { roadmap } = generateRoadmap(metrics);
+    const markdown = generateMarkdown(roadmap);
+    
+    const fs = await import('fs');
+    fs.writeFileSync(filename, markdown);
+    console.log(`Roadmap exported to ${filename}`);
+  } else {
+    console.log('Usage: roadmap-gen/generate.ts [--generate|--export <filename>]');
+  }
+}