diff --git a/cleanup.go b/cleanup.go index 6fa072e..19cff8f 100644 --- a/cleanup.go +++ b/cleanup.go @@ -113,7 +113,7 @@ func (c *Cleaner) findStuckInstallations(ctx context.Context) ([]StuckRecord, er cutoffStr := cutoff.Format("2006-01-02 15:04:05") // Build filter: status='installing' AND created < cutoff - filter := url.QueryEscape(fmt.Sprintf("status='installing' && created<'%s'", cutoffStr)) + filter := url.QueryEscape(fmt.Sprintf("(status='installing' || status='configuring') && created<'%s'", cutoffStr)) var allRecords []StuckRecord page := 1 diff --git a/dashboard.go b/dashboard.go index a0c4bad..70ddebc 100644 --- a/dashboard.go +++ b/dashboard.go @@ -456,7 +456,7 @@ func aggregateRecords(records []TelemetryRecord, knownScripts map[string]ScriptI st.Failed++ case "aborted": st.Aborted++ - case "installing": + case "installing", "configuring": st.Installing++ } } @@ -622,7 +622,7 @@ func (s *ScriptStatsStore) IncrementalUpdate(ctx context.Context, repoSource str st.Failed++ case "aborted": st.Aborted++ - case "installing": + case "installing", "configuring": st.Installing++ } added++ @@ -867,8 +867,8 @@ func (p *PBClient) FetchScriptAnalysisData(ctx context.Context, days int, repoSo strings.Contains(strings.ToLower(r.Error), "aborted by user")) { r.Status = "aborted" } - // Reclassify failed+exit_code=0 - if r.Status == "failed" && r.ExitCode == 0 && (r.Error == "" || strings.ToLower(r.Error) == "success") { + // Reclassify failed+exit_code=0 — exit_code=0 is NEVER an error + if r.Status == "failed" && r.ExitCode == 0 { r.Status = "success" } @@ -886,7 +886,7 @@ func (p *PBClient) FetchScriptAnalysisData(ctx context.Context, days int, repoSo a.failed++ case "aborted": a.aborted++ - case "installing": + case "installing", "configuring": a.installing++ } @@ -1041,12 +1041,12 @@ func (p *PBClient) FetchErrorAnalysisData(ctx context.Context, days int, repoSou r.Status = "aborted" } - // Reclassify: status="failed" with exit_code=0 and no error text is actually success - if r.Status == "failed" && r.ExitCode == 0 && (r.Error == "" || strings.ToLower(r.Error) == "success") { + // Reclassify: exit_code=0 is NEVER an error — always reclassify as success + if r.Status == "failed" && r.ExitCode == 0 { r.Status = "success" } - if r.Status == "installing" { + if r.Status == "installing" || r.Status == "configuring" { stuckCount++ continue } @@ -1100,9 +1100,6 @@ func (p *PBClient) FetchErrorAnalysisData(ctx context.Context, days int, repoSou } if r.Error != "" && (appStats[key].topError == "" || len(r.Error) > len(appStats[key].topError)) { appStats[key].topError = r.Error - if len(appStats[key].topError) > 150 { - appStats[key].topError = appStats[key].topError[:150] + "..." - } } if cat != "uncategorized" && appStats[key].topCategory == "" { appStats[key].topCategory = cat @@ -1214,6 +1211,9 @@ func (p *PBClient) FetchErrorAnalysisData(ctx context.Context, days int, repoSou case 124: desc = "Command timed out (timeout command)" cat = "timeout" + case 125: + desc = "Docker daemon error (container failed to run)" + cat = "config" case 126: desc = "Command cannot execute (permission problem)" cat = "permission" @@ -1222,10 +1222,16 @@ func (p *PBClient) FetchErrorAnalysisData(ctx context.Context, days int, repoSou cat = "command_not_found" case 128: desc = "Invalid argument to exit" - cat = "unknown" + cat = "signal" + case 129: + desc = "Killed by SIGHUP (terminal closed)" + cat = "signal" case 130: desc = "Script terminated by Ctrl+C (SIGINT)" cat = "user_aborted" + case 131: + desc = "Killed by SIGQUIT (core dump)" + cat = "signal" case 134: desc = "Process aborted (SIGABRT)" cat = "signal" @@ -1402,8 +1408,8 @@ func (p *PBClient) FetchErrorAnalysisData(ctx context.Context, days int, repoSou desc = "npm/pnpm/yarn: Unknown fatal error" cat = "unknown" case 255: - desc = "Script error (set -e / errexit triggered or SSH error)" - cat = "unknown" + desc = "DPKG: Fatal internal error / set -e triggered" + cat = "apt" default: if code > 128 && code < 192 { sigNum := code - 128 @@ -1684,7 +1690,7 @@ func (p *PBClient) FetchDashboardData(ctx context.Context, days int, repoSource } case "aborted": data.AbortedCount++ - case "installing": + case "installing", "configuring": data.InstallingCount++ } @@ -2846,6 +2852,12 @@ func DashboardHTML() string { border-color: rgba(234, 179, 8, 0.3); } + .status-badge.configuring { + background: rgba(59, 130, 246, 0.15); + color: var(--accent-blue); + border-color: rgba(59, 130, 246, 0.3); + } + .status-badge.aborted { background: rgba(168, 85, 247, 0.15); color: var(--accent-purple); @@ -3973,6 +3985,7 @@ func DashboardHTML() string { Failed Aborted Installing + Configuring Unknown @@ -5475,9 +5488,9 @@ func ErrorAnalysisHTML() string { const failRateColor = a.failure_rate > 50 ? 'var(--accent-red)' : a.failure_rate > 20 ? 'var(--accent-orange)' : 'var(--accent-yellow)'; const topCat = a.top_category ? '' + escapeHtml(a.top_category) + '' : '-'; const errorId = 'err-app-' + idx; - const shortError = escapeHtml((a.top_error || '-').substring(0, 80)); + const shortError = escapeHtml((a.top_error || '-').substring(0, 120)); const fullError = escapeHtml(a.top_error || '-'); - const isLong = (a.top_error || '').length > 80; + const isLong = (a.top_error || '').length > 120; return '' + '' + escapeHtml(a.app) + '' + '' + (a.type || '-').toUpperCase() + '' + @@ -5488,7 +5501,7 @@ func ErrorAnalysisHTML() string { '' + (a.top_exit_code ? '' + a.top_exit_code + '' : '-') + '' + '' + '' + shortError + (isLong ? ' show more' : '') + '' + - (isLong ? '' + fullError + ' show less' : '') + + (isLong ? '' + fullError + ' show less' : '') + '' + '🐛 Issue' + ''; @@ -5508,9 +5521,9 @@ func ErrorAnalysisHTML() string { const catClass = (e.error_category || 'unknown').replace(/ /g, '_'); const os = e.os_type ? e.os_type + (e.os_version ? ' ' + e.os_version : '') : '-'; const errorId = 'err-recent-' + idx; - const shortError = escapeHtml((e.error || '-').substring(0, 80)); + const shortError = escapeHtml((e.error || '-').substring(0, 120)); const fullError = escapeHtml(e.error || '-'); - const isLong = (e.error || '').length > 80; + const isLong = (e.error || '').length > 120; return '' + '' + escapeHtml(e.status) + '' + '' + (e.type || '-').toUpperCase() + '' + @@ -5519,7 +5532,7 @@ func ErrorAnalysisHTML() string { '' + escapeHtml(e.error_category || 'unknown') + '' + '' + '' + shortError + (isLong ? ' show more' : '') + '' + - (isLong ? '' + fullError + ' show less' : '') + + (isLong ? '' + fullError + ' show less' : '') + '' + '' + escapeHtml(os) + '' + '' + formatTimestamp(e.created) + '' + diff --git a/service.go b/service.go index 10412cf..730159f 100644 --- a/service.go +++ b/service.go @@ -607,7 +607,7 @@ var ( allowedType = map[string]bool{"lxc": true, "vm": true, "tool": true, "addon": true} // Allowed values for 'status' field - allowedStatus = map[string]bool{"installing": true, "success": true, "failed": true, "aborted": true, "unknown": true} + allowedStatus = map[string]bool{"installing": true, "configuring": true, "success": true, "failed": true, "aborted": true, "unknown": true} // Allowed values for 'os_type' field allowedOsType = map[string]bool{ @@ -630,21 +630,42 @@ var ( "network": true, "storage": true, "dependency": true, "permission": true, "timeout": true, "config": true, "resource": true, "unknown": true, "": true, "user_aborted": true, "apt": true, "command_not_found": true, "signal": true, + "service": true, "database": true, "proxmox": true, } // exitCodeCategories maps well-known exit codes to error categories exitCodeCategories = map[int]string{ 1: "unknown", // General error 2: "unknown", // Misuse of shell builtins - 100: "apt", // APT: package manager error (broken packages / dependency problems) - 126: "permission", // Command invoked cannot execute (permission problem or not executable) + 4: "network", // curl: Network/protocol error + 5: "network", // curl: Could not resolve proxy + 6: "network", // curl: Could not resolve host + 7: "network", // curl: Connection refused + 8: "network", // curl: FTP server reply error + 10: "config", // Docker / privileged mode required + 22: "network", // curl: HTTP error (404/500 etc.) + 23: "storage", // curl: Write error (disk full?) + 25: "network", // curl: Upload failed + 28: "timeout", // curl: Connection timed out + 35: "network", // SSL connect error + 56: "network", // curl: Receive error (connection reset) + 100: "apt", // APT: package manager error + 101: "apt", // APT: Unmet dependencies + 102: "apt", // APT: Lock held by another process + 124: "timeout", // Command timed out + 125: "config", // Docker daemon error / container failed to run + 126: "permission", // Command invoked cannot execute 127: "command_not_found", // Command not found 128: "signal", // Invalid argument to exit + 129: "signal", // Killed by SIGHUP (terminal closed) 130: "user_aborted", // Script terminated by Ctrl+C (SIGINT) + 131: "signal", // Killed by SIGQUIT (core dump) + 134: "signal", // Process aborted (SIGABRT) 137: "resource", // SIGKILL - often OOM killer 139: "unknown", // SIGSEGV - segfault - 141: "unknown", // SIGPIPE + 141: "signal", // SIGPIPE 143: "signal", // SIGTERM + 255: "apt", // DPKG: Fatal internal error } // exitCodeDescriptions provides human-readable exit code descriptions @@ -652,15 +673,38 @@ var ( 0: "Success", 1: "General error", 2: "Misuse of shell builtins", + 4: "curl: Network/protocol error", + 5: "curl: Could not resolve proxy", + 6: "curl: DNS resolution failed", + 7: "curl: Connection refused", + 8: "curl: FTP server reply error", + 10: "Docker / privileged mode required (unsupported environment)", + 22: "curl: HTTP error (404/500 etc.)", + 23: "curl: Write error (disk full?)", + 25: "curl: Upload failed", + 28: "curl: Connection timed out", + 30: "curl: FTP port command failed", + 35: "SSL connect error", + 56: "curl: Receive error (connection reset)", + 75: "Temporary failure (retry later)", + 78: "curl: Remote file not found (404)", 100: "APT: Package manager error (broken packages / dependency problems)", - 126: "Command invoked cannot execute (permission problem or not executable)", + 101: "APT: Unmet dependencies", + 102: "APT: Lock held by another process", + 124: "Command timed out", + 125: "Docker daemon error (container failed to run)", + 126: "Command cannot execute (permission problem)", 127: "Command not found", 128: "Invalid argument to exit", + 129: "Killed by SIGHUP (terminal closed)", 130: "Script terminated by Ctrl+C (SIGINT)", + 131: "Killed by SIGQUIT (core dump)", + 134: "Process aborted (SIGABRT)", 137: "Process killed (SIGKILL) - likely OOM", 139: "Segmentation fault (SIGSEGV)", 141: "Broken pipe (SIGPIPE)", 143: "Process terminated (SIGTERM)", + 255: "DPKG: Fatal internal error", } ) @@ -814,6 +858,13 @@ func computeHash(out TelemetryOut) string { // categorizeErrorText assigns an error_category based on error text patterns func categorizeErrorText(errLower string) string { + // Docker / container errors (check early, before generic patterns) + if strings.Contains(errLower, "docker") || + strings.Contains(errLower, "privileged mode") || + strings.Contains(errLower, "container runtime") || + strings.Contains(errLower, "daemon") { + return "config" + } // Network errors if strings.Contains(errLower, "connection refused") || strings.Contains(errLower, "could not resolve") || @@ -827,7 +878,7 @@ func categorizeErrorText(errLower string) string { strings.Contains(errLower, "certificate") { return "network" } - // APT / package manager + // APT / package manager (check before generic "dependency") if strings.Contains(errLower, "apt") || strings.Contains(errLower, "dpkg") || strings.Contains(errLower, "broken packages") || @@ -848,6 +899,23 @@ func categorizeErrorText(errLower string) string { strings.Contains(errLower, "access denied") { return "permission" } + // Resource (OOM, memory) + if strings.Contains(errLower, "oom") || + strings.Contains(errLower, "out of memory") || + strings.Contains(errLower, "cannot allocate") || + strings.Contains(errLower, "killed") || + strings.Contains(errLower, "sigkill") { + return "resource" + } + // Signal-related + if strings.Contains(errLower, "sighup") || + strings.Contains(errLower, "sigquit") || + strings.Contains(errLower, "sigterm") || + strings.Contains(errLower, "sigabrt") || + strings.Contains(errLower, "sigpipe") || + strings.Contains(errLower, "core dump") { + return "signal" + } // Command not found if strings.Contains(errLower, "command not found") || strings.Contains(errLower, "not found") { @@ -859,12 +927,6 @@ func categorizeErrorText(errLower string) string { strings.Contains(errLower, "missing") { return "dependency" } - // Resource - if strings.Contains(errLower, "oom") || - strings.Contains(errLower, "out of memory") || - strings.Contains(errLower, "cannot allocate") { - return "resource" - } // Config if strings.Contains(errLower, "config") || strings.Contains(errLower, "syntax error") || @@ -1643,6 +1705,16 @@ func main() { return } + // Auto-reclassify: exit_code=0 is NEVER an error — always reclassify as success + if in.Status == "failed" && in.ExitCode == 0 { + in.Status = "success" + in.Error = "" + in.ErrorCategory = "" + if cfg.EnableReqLogging { + log.Printf("auto-reclassified exit_code=0 as success: nsapp=%s", in.NSAPP) + } + } + // Auto-reclassify: clients still send status="failed" for SIGINT/Ctrl+C, // detect and reclassify as "aborted" server-side. errorLower := strings.ToLower(in.Error)