Add configuring status and improve error analysis

Treat a new 'configuring' status alongside 'installing' across services and UI (filters, counts, badges, allowedStatus, stuck-install detection). Reclassify failed records with exit_code==0 as success (in Fetch* paths and main ingestion) and remove aggressive top-error truncation; increase error preview/full display limits. Expand exit code mappings and descriptions (many curl/apt/docker/signal/timeouts added) and enhance categorizeErrorText to detect Docker/container, resource (OOM) and signal-related errors for better error_category assignment. Misc: add new HTML/CSS for configuring badge and adjust related dashboard/error-analysis rendering.
This commit is contained in:
CanbiZ (MickLesk)
2026-02-17 09:23:11 +01:00
parent 780613f6ab
commit e8c1d68967
3 changed files with 119 additions and 34 deletions
+1 -1
View File
@@ -113,7 +113,7 @@ func (c *Cleaner) findStuckInstallations(ctx context.Context) ([]StuckRecord, er
cutoffStr := cutoff.Format("2006-01-02 15:04:05")
// Build filter: status='installing' AND created < cutoff
filter := url.QueryEscape(fmt.Sprintf("status='installing' && created<'%s'", cutoffStr))
filter := url.QueryEscape(fmt.Sprintf("(status='installing' || status='configuring') && created<'%s'", cutoffStr))
var allRecords []StuckRecord
page := 1
+34 -21
View File
@@ -456,7 +456,7 @@ func aggregateRecords(records []TelemetryRecord, knownScripts map[string]ScriptI
st.Failed++
case "aborted":
st.Aborted++
case "installing":
case "installing", "configuring":
st.Installing++
}
}
@@ -622,7 +622,7 @@ func (s *ScriptStatsStore) IncrementalUpdate(ctx context.Context, repoSource str
st.Failed++
case "aborted":
st.Aborted++
case "installing":
case "installing", "configuring":
st.Installing++
}
added++
@@ -867,8 +867,8 @@ func (p *PBClient) FetchScriptAnalysisData(ctx context.Context, days int, repoSo
strings.Contains(strings.ToLower(r.Error), "aborted by user")) {
r.Status = "aborted"
}
// Reclassify failed+exit_code=0
if r.Status == "failed" && r.ExitCode == 0 && (r.Error == "" || strings.ToLower(r.Error) == "success") {
// Reclassify failed+exit_code=0 — exit_code=0 is NEVER an error
if r.Status == "failed" && r.ExitCode == 0 {
r.Status = "success"
}
@@ -886,7 +886,7 @@ func (p *PBClient) FetchScriptAnalysisData(ctx context.Context, days int, repoSo
a.failed++
case "aborted":
a.aborted++
case "installing":
case "installing", "configuring":
a.installing++
}
@@ -1041,12 +1041,12 @@ func (p *PBClient) FetchErrorAnalysisData(ctx context.Context, days int, repoSou
r.Status = "aborted"
}
// Reclassify: status="failed" with exit_code=0 and no error text is actually success
if r.Status == "failed" && r.ExitCode == 0 && (r.Error == "" || strings.ToLower(r.Error) == "success") {
// Reclassify: exit_code=0 is NEVER an error — always reclassify as success
if r.Status == "failed" && r.ExitCode == 0 {
r.Status = "success"
}
if r.Status == "installing" {
if r.Status == "installing" || r.Status == "configuring" {
stuckCount++
continue
}
@@ -1100,9 +1100,6 @@ func (p *PBClient) FetchErrorAnalysisData(ctx context.Context, days int, repoSou
}
if r.Error != "" && (appStats[key].topError == "" || len(r.Error) > len(appStats[key].topError)) {
appStats[key].topError = r.Error
if len(appStats[key].topError) > 150 {
appStats[key].topError = appStats[key].topError[:150] + "..."
}
}
if cat != "uncategorized" && appStats[key].topCategory == "" {
appStats[key].topCategory = cat
@@ -1214,6 +1211,9 @@ func (p *PBClient) FetchErrorAnalysisData(ctx context.Context, days int, repoSou
case 124:
desc = "Command timed out (timeout command)"
cat = "timeout"
case 125:
desc = "Docker daemon error (container failed to run)"
cat = "config"
case 126:
desc = "Command cannot execute (permission problem)"
cat = "permission"
@@ -1222,10 +1222,16 @@ func (p *PBClient) FetchErrorAnalysisData(ctx context.Context, days int, repoSou
cat = "command_not_found"
case 128:
desc = "Invalid argument to exit"
cat = "unknown"
cat = "signal"
case 129:
desc = "Killed by SIGHUP (terminal closed)"
cat = "signal"
case 130:
desc = "Script terminated by Ctrl+C (SIGINT)"
cat = "user_aborted"
case 131:
desc = "Killed by SIGQUIT (core dump)"
cat = "signal"
case 134:
desc = "Process aborted (SIGABRT)"
cat = "signal"
@@ -1402,8 +1408,8 @@ func (p *PBClient) FetchErrorAnalysisData(ctx context.Context, days int, repoSou
desc = "npm/pnpm/yarn: Unknown fatal error"
cat = "unknown"
case 255:
desc = "Script error (set -e / errexit triggered or SSH error)"
cat = "unknown"
desc = "DPKG: Fatal internal error / set -e triggered"
cat = "apt"
default:
if code > 128 && code < 192 {
sigNum := code - 128
@@ -1684,7 +1690,7 @@ func (p *PBClient) FetchDashboardData(ctx context.Context, days int, repoSource
}
case "aborted":
data.AbortedCount++
case "installing":
case "installing", "configuring":
data.InstallingCount++
}
@@ -2846,6 +2852,12 @@ func DashboardHTML() string {
border-color: rgba(234, 179, 8, 0.3);
}
.status-badge.configuring {
background: rgba(59, 130, 246, 0.15);
color: var(--accent-blue);
border-color: rgba(59, 130, 246, 0.3);
}
.status-badge.aborted {
background: rgba(168, 85, 247, 0.15);
color: var(--accent-purple);
@@ -3973,6 +3985,7 @@ func DashboardHTML() string {
<option value="failed">Failed</option>
<option value="aborted">Aborted</option>
<option value="installing">Installing</option>
<option value="configuring">Configuring</option>
<option value="unknown">Unknown</option>
</select>
<select id="filterOs" class="custom-select" onchange="filterTable()">
@@ -5475,9 +5488,9 @@ func ErrorAnalysisHTML() string {
const failRateColor = a.failure_rate > 50 ? 'var(--accent-red)' : a.failure_rate > 20 ? 'var(--accent-orange)' : 'var(--accent-yellow)';
const topCat = a.top_category ? '<span class="category-badge ' + a.top_category + '">' + escapeHtml(a.top_category) + '</span>' : '-';
const errorId = 'err-app-' + idx;
const shortError = escapeHtml((a.top_error || '-').substring(0, 80));
const shortError = escapeHtml((a.top_error || '-').substring(0, 120));
const fullError = escapeHtml(a.top_error || '-');
const isLong = (a.top_error || '').length > 80;
const isLong = (a.top_error || '').length > 120;
return '<tr>' +
'<td><strong>' + escapeHtml(a.app) + '</strong></td>' +
'<td><span class="type-badge ' + typeClass + '">' + (a.type || '-').toUpperCase() + '</span></td>' +
@@ -5488,7 +5501,7 @@ func ErrorAnalysisHTML() string {
'<td>' + (a.top_exit_code ? '<span class="exit-code err">' + a.top_exit_code + '</span>' : '-') + '</td>' +
'<td class="error-text">' +
'<div id="' + errorId + '-short">' + shortError + (isLong ? ' <a href="#" onclick="toggleError(\'' + errorId + '\');return false;" style="color:var(--accent-blue);font-size:11px;">show more</a>' : '') + '</div>' +
(isLong ? '<div id="' + errorId + '-full" style="display:none;white-space:pre-wrap;word-break:break-all;max-height:300px;overflow-y:auto;">' + fullError + ' <a href="#" onclick="toggleError(\'' + errorId + '\');return false;" style="color:var(--accent-blue);font-size:11px;">show less</a></div>' : '') +
(isLong ? '<div id="' + errorId + '-full" style="display:none;white-space:pre-wrap;word-break:break-all;max-height:600px;overflow-y:auto;">' + fullError + ' <a href="#" onclick="toggleError(\'' + errorId + '\');return false;" style="color:var(--accent-blue);font-size:11px;">show less</a></div>' : '') +
'</td>' +
'<td><button class="btn issue-btn" data-app="' + escapeAttr(a.app) + '" data-exit="' + (a.top_exit_code||0) + '" data-error="' + escapeAttr(a.top_error||'') + '" data-rate="' + a.failure_rate.toFixed(1) + '">🐛 Issue</button></td>' +
'</tr>';
@@ -5508,9 +5521,9 @@ func ErrorAnalysisHTML() string {
const catClass = (e.error_category || 'unknown').replace(/ /g, '_');
const os = e.os_type ? e.os_type + (e.os_version ? ' ' + e.os_version : '') : '-';
const errorId = 'err-recent-' + idx;
const shortError = escapeHtml((e.error || '-').substring(0, 80));
const shortError = escapeHtml((e.error || '-').substring(0, 120));
const fullError = escapeHtml(e.error || '-');
const isLong = (e.error || '').length > 80;
const isLong = (e.error || '').length > 120;
return '<tr>' +
'<td><span class="status-badge ' + statusClass + '">' + escapeHtml(e.status) + '</span></td>' +
'<td><span class="type-badge ' + typeClass + '">' + (e.type || '-').toUpperCase() + '</span></td>' +
@@ -5519,7 +5532,7 @@ func ErrorAnalysisHTML() string {
'<td><span class="category-badge ' + catClass + '">' + escapeHtml(e.error_category || 'unknown') + '</span></td>' +
'<td class="error-text">' +
'<div id="' + errorId + '-short">' + shortError + (isLong ? ' <a href="#" onclick="toggleError(\'' + errorId + '\');return false;" style="color:var(--accent-blue);font-size:11px;">show more</a>' : '') + '</div>' +
(isLong ? '<div id="' + errorId + '-full" style="display:none;white-space:pre-wrap;word-break:break-all;max-height:300px;overflow-y:auto;">' + fullError + ' <a href="#" onclick="toggleError(\'' + errorId + '\');return false;" style="color:var(--accent-blue);font-size:11px;">show less</a></div>' : '') +
(isLong ? '<div id="' + errorId + '-full" style="display:none;white-space:pre-wrap;word-break:break-all;max-height:600px;overflow-y:auto;">' + fullError + ' <a href="#" onclick="toggleError(\'' + errorId + '\');return false;" style="color:var(--accent-blue);font-size:11px;">show less</a></div>' : '') +
'</td>' +
'<td>' + escapeHtml(os) + '</td>' +
'<td style="white-space:nowrap;">' + formatTimestamp(e.created) + '</td>' +
+84 -12
View File
@@ -607,7 +607,7 @@ var (
allowedType = map[string]bool{"lxc": true, "vm": true, "tool": true, "addon": true}
// Allowed values for 'status' field
allowedStatus = map[string]bool{"installing": true, "success": true, "failed": true, "aborted": true, "unknown": true}
allowedStatus = map[string]bool{"installing": true, "configuring": true, "success": true, "failed": true, "aborted": true, "unknown": true}
// Allowed values for 'os_type' field
allowedOsType = map[string]bool{
@@ -630,21 +630,42 @@ var (
"network": true, "storage": true, "dependency": true, "permission": true,
"timeout": true, "config": true, "resource": true, "unknown": true, "": true,
"user_aborted": true, "apt": true, "command_not_found": true, "signal": true,
"service": true, "database": true, "proxmox": true,
}
// exitCodeCategories maps well-known exit codes to error categories
exitCodeCategories = map[int]string{
1: "unknown", // General error
2: "unknown", // Misuse of shell builtins
100: "apt", // APT: package manager error (broken packages / dependency problems)
126: "permission", // Command invoked cannot execute (permission problem or not executable)
4: "network", // curl: Network/protocol error
5: "network", // curl: Could not resolve proxy
6: "network", // curl: Could not resolve host
7: "network", // curl: Connection refused
8: "network", // curl: FTP server reply error
10: "config", // Docker / privileged mode required
22: "network", // curl: HTTP error (404/500 etc.)
23: "storage", // curl: Write error (disk full?)
25: "network", // curl: Upload failed
28: "timeout", // curl: Connection timed out
35: "network", // SSL connect error
56: "network", // curl: Receive error (connection reset)
100: "apt", // APT: package manager error
101: "apt", // APT: Unmet dependencies
102: "apt", // APT: Lock held by another process
124: "timeout", // Command timed out
125: "config", // Docker daemon error / container failed to run
126: "permission", // Command invoked cannot execute
127: "command_not_found", // Command not found
128: "signal", // Invalid argument to exit
129: "signal", // Killed by SIGHUP (terminal closed)
130: "user_aborted", // Script terminated by Ctrl+C (SIGINT)
131: "signal", // Killed by SIGQUIT (core dump)
134: "signal", // Process aborted (SIGABRT)
137: "resource", // SIGKILL - often OOM killer
139: "unknown", // SIGSEGV - segfault
141: "unknown", // SIGPIPE
141: "signal", // SIGPIPE
143: "signal", // SIGTERM
255: "apt", // DPKG: Fatal internal error
}
// exitCodeDescriptions provides human-readable exit code descriptions
@@ -652,15 +673,38 @@ var (
0: "Success",
1: "General error",
2: "Misuse of shell builtins",
4: "curl: Network/protocol error",
5: "curl: Could not resolve proxy",
6: "curl: DNS resolution failed",
7: "curl: Connection refused",
8: "curl: FTP server reply error",
10: "Docker / privileged mode required (unsupported environment)",
22: "curl: HTTP error (404/500 etc.)",
23: "curl: Write error (disk full?)",
25: "curl: Upload failed",
28: "curl: Connection timed out",
30: "curl: FTP port command failed",
35: "SSL connect error",
56: "curl: Receive error (connection reset)",
75: "Temporary failure (retry later)",
78: "curl: Remote file not found (404)",
100: "APT: Package manager error (broken packages / dependency problems)",
126: "Command invoked cannot execute (permission problem or not executable)",
101: "APT: Unmet dependencies",
102: "APT: Lock held by another process",
124: "Command timed out",
125: "Docker daemon error (container failed to run)",
126: "Command cannot execute (permission problem)",
127: "Command not found",
128: "Invalid argument to exit",
129: "Killed by SIGHUP (terminal closed)",
130: "Script terminated by Ctrl+C (SIGINT)",
131: "Killed by SIGQUIT (core dump)",
134: "Process aborted (SIGABRT)",
137: "Process killed (SIGKILL) - likely OOM",
139: "Segmentation fault (SIGSEGV)",
141: "Broken pipe (SIGPIPE)",
143: "Process terminated (SIGTERM)",
255: "DPKG: Fatal internal error",
}
)
@@ -814,6 +858,13 @@ func computeHash(out TelemetryOut) string {
// categorizeErrorText assigns an error_category based on error text patterns
func categorizeErrorText(errLower string) string {
// Docker / container errors (check early, before generic patterns)
if strings.Contains(errLower, "docker") ||
strings.Contains(errLower, "privileged mode") ||
strings.Contains(errLower, "container runtime") ||
strings.Contains(errLower, "daemon") {
return "config"
}
// Network errors
if strings.Contains(errLower, "connection refused") ||
strings.Contains(errLower, "could not resolve") ||
@@ -827,7 +878,7 @@ func categorizeErrorText(errLower string) string {
strings.Contains(errLower, "certificate") {
return "network"
}
// APT / package manager
// APT / package manager (check before generic "dependency")
if strings.Contains(errLower, "apt") ||
strings.Contains(errLower, "dpkg") ||
strings.Contains(errLower, "broken packages") ||
@@ -848,6 +899,23 @@ func categorizeErrorText(errLower string) string {
strings.Contains(errLower, "access denied") {
return "permission"
}
// Resource (OOM, memory)
if strings.Contains(errLower, "oom") ||
strings.Contains(errLower, "out of memory") ||
strings.Contains(errLower, "cannot allocate") ||
strings.Contains(errLower, "killed") ||
strings.Contains(errLower, "sigkill") {
return "resource"
}
// Signal-related
if strings.Contains(errLower, "sighup") ||
strings.Contains(errLower, "sigquit") ||
strings.Contains(errLower, "sigterm") ||
strings.Contains(errLower, "sigabrt") ||
strings.Contains(errLower, "sigpipe") ||
strings.Contains(errLower, "core dump") {
return "signal"
}
// Command not found
if strings.Contains(errLower, "command not found") ||
strings.Contains(errLower, "not found") {
@@ -859,12 +927,6 @@ func categorizeErrorText(errLower string) string {
strings.Contains(errLower, "missing") {
return "dependency"
}
// Resource
if strings.Contains(errLower, "oom") ||
strings.Contains(errLower, "out of memory") ||
strings.Contains(errLower, "cannot allocate") {
return "resource"
}
// Config
if strings.Contains(errLower, "config") ||
strings.Contains(errLower, "syntax error") ||
@@ -1643,6 +1705,16 @@ func main() {
return
}
// Auto-reclassify: exit_code=0 is NEVER an error — always reclassify as success
if in.Status == "failed" && in.ExitCode == 0 {
in.Status = "success"
in.Error = ""
in.ErrorCategory = ""
if cfg.EnableReqLogging {
log.Printf("auto-reclassified exit_code=0 as success: nsapp=%s", in.NSAPP)
}
}
// Auto-reclassify: clients still send status="failed" for SIGINT/Ctrl+C,
// detect and reclassify as "aborted" server-side.
errorLower := strings.ToLower(in.Error)