fix: detect crashed threads by checking worker PID liveness
When .running marker exists but no __end__ in CAS chain, check if the worker process is actually alive. Dead PID means the worker crashed without cleanup → status 'failed'. Fixes #170 小橘 <xiaoju@shazhou.work>
This commit is contained in:
@@ -11,6 +11,7 @@ import { END } from "@uncaged/workflow-runtime";
|
|||||||
import { getGlobalCasDir } from "@uncaged/workflow-util";
|
import { getGlobalCasDir } from "@uncaged/workflow-util";
|
||||||
|
|
||||||
import { pathExists, readTextFileIfExists } from "./fs-utils.js";
|
import { pathExists, readTextFileIfExists } from "./fs-utils.js";
|
||||||
|
import { readWorkerCtl } from "./worker-spawn.js";
|
||||||
|
|
||||||
async function readWorkflowNameFromStartHash(
|
async function readWorkflowNameFromStartHash(
|
||||||
storageRoot: string,
|
storageRoot: string,
|
||||||
@@ -217,6 +218,16 @@ export async function resolveThreadListStatus(
|
|||||||
return "completed";
|
return "completed";
|
||||||
}
|
}
|
||||||
if (runningMarkerPresent) {
|
if (runningMarkerPresent) {
|
||||||
|
const ctlResult = await readWorkerCtl(storageRoot, row.hash);
|
||||||
|
if (ctlResult.ok) {
|
||||||
|
try {
|
||||||
|
process.kill(ctlResult.value.pid, 0);
|
||||||
|
return "running";
|
||||||
|
} catch {
|
||||||
|
// Worker PID is dead but .running marker remains — crashed thread
|
||||||
|
return "failed";
|
||||||
|
}
|
||||||
|
}
|
||||||
return "running";
|
return "running";
|
||||||
}
|
}
|
||||||
return "active";
|
return "active";
|
||||||
|
|||||||
Reference in New Issue
Block a user