fix: detect crashed threads by checking worker PID liveness

When .running marker exists but no __end__ in CAS chain,
check if the worker process is actually alive. Dead PID
means the worker crashed without cleanup → status 'failed'.

Fixes #170

小橘 <xiaoju@shazhou.work>
This commit is contained in:
2026-05-09 12:38:18 +00:00
parent d6fe3f844c
commit 7e7f6aa6d6
+11
View File
@@ -11,6 +11,7 @@ import { END } from "@uncaged/workflow-runtime";
import { getGlobalCasDir } from "@uncaged/workflow-util"; import { getGlobalCasDir } from "@uncaged/workflow-util";
import { pathExists, readTextFileIfExists } from "./fs-utils.js"; import { pathExists, readTextFileIfExists } from "./fs-utils.js";
import { readWorkerCtl } from "./worker-spawn.js";
async function readWorkflowNameFromStartHash( async function readWorkflowNameFromStartHash(
storageRoot: string, storageRoot: string,
@@ -217,6 +218,16 @@ export async function resolveThreadListStatus(
return "completed"; return "completed";
} }
if (runningMarkerPresent) { if (runningMarkerPresent) {
const ctlResult = await readWorkerCtl(storageRoot, row.hash);
if (ctlResult.ok) {
try {
process.kill(ctlResult.value.pid, 0);
return "running";
} catch {
// Worker PID is dead but .running marker remains — crashed thread
return "failed";
}
}
return "running"; return "running";
} }
return "active"; return "active";