From b5e094ab4d8904b03212d1aa04a633ab779303fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=B0=8F=E6=A9=98?= Date: Sun, 7 Jun 2026 10:33:41 +0000 Subject: [PATCH] =?UTF-8?q?feat(cli):=20step=20ask=20=E2=80=94=20read-only?= =?UTF-8?q?=20query=20to=20historical=20step=20sessions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `uwf step ask -p ` for asking follow-up questions to a completed step's agent without mutating thread state. - Fork-by-default: creates and caches a fork session per step (cache key `:ask`); subsequent asks reuse it. - `--no-fork` fallback: spawns a fresh session with the step's detail ref injected as context. - `--agent` overrides the recorded agent; otherwise resolves from the step's agent field via config alias. - Updates `packages/cli/README.md` and `packages/util/src/usage-reference.ts` so the new subcommand is discoverable via README and `uwf prompt usage`. Fixes #146 --- .changeset/step-ask.md | 18 + packages/cli/README.md | 3 + packages/cli/src/__tests__/step-ask.test.ts | 670 ++++++++++++++++++++ packages/cli/src/cli.ts | 28 +- packages/cli/src/commands/step.ts | 229 ++++++- packages/util/src/usage-reference.ts | 5 + 6 files changed, 951 insertions(+), 2 deletions(-) create mode 100644 .changeset/step-ask.md create mode 100644 packages/cli/src/__tests__/step-ask.test.ts diff --git a/.changeset/step-ask.md b/.changeset/step-ask.md new file mode 100644 index 0000000..ca26a52 --- /dev/null +++ b/.changeset/step-ask.md @@ -0,0 +1,18 @@ +--- +"@united-workforce/cli": minor +"@united-workforce/util": patch +--- + +feat(cli): add `uwf step ask -p ` read-only follow-up command + +Phase 2b of the ask-session work. Adds a new subcommand that lets the user ask +a follow-up question to a historical step's agent without writing a new +`StepNode` or mutating thread state. The command resolves the agent from the +recorded step (or `--agent ` override), forks the original session via the +adapter's `--mode fork --session ` contract, caches the resulting +ask-session id under `:ask` so subsequent asks reuse it, then invokes +the agent with `--mode ask --session --prompt --detail ` +and streams the raw stdout to the caller. `--no-fork` falls back to a fresh +session that receives the step's detail ref for context. The `prompt usage` +reference (in `@united-workforce/util`) is also updated so agents discover the +new subcommand. Resolves issue #146. diff --git a/packages/cli/README.md b/packages/cli/README.md index 52a54cc..d97ecb1 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -79,6 +79,7 @@ uwf thread stop 01ARZ3NDEKTSV4RRFFQ69G5FAV | `uwf step show ` | Show step metadata and frontmatter | | `uwf step read [--quota ]` | Read a step's turns as human-readable markdown | | `uwf step fork ` | Fork a thread from a specific step | +| `uwf step ask -p [--agent ] [--no-fork]` | Ask a follow-up question to a historical step's agent (read-only; no thread mutation) | Examples: @@ -87,6 +88,8 @@ uwf step list 01ARZ3NDEKTSV4RRFFQ69G5FAV uwf step show 32GCDE899RRQ3 uwf step read 32GCDE899RRQ3 --quota 2000 uwf step fork 32GCDE899RRQ3 +uwf step ask 32GCDE899RRQ3 -p "Why did you choose this approach?" +uwf step ask 32GCDE899RRQ3 -p "Summarise the key findings" --no-fork ``` ### Workflow (Layer 1: Templates) diff --git a/packages/cli/src/__tests__/step-ask.test.ts b/packages/cli/src/__tests__/step-ask.test.ts new file mode 100644 index 0000000..5d829d6 --- /dev/null +++ b/packages/cli/src/__tests__/step-ask.test.ts @@ -0,0 +1,670 @@ +import { execFileSync } from "node:child_process"; +import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { bootstrap, putSchema } from "@ocas/core"; +import { openStore } from "@ocas/fs"; +import type { CasRef, ThreadId, ThreadIndexEntry } from "@united-workforce/protocol"; +import { afterEach, beforeEach, describe, expect, test } from "vitest"; +import { registerUwfSchemas } from "../schemas.js"; +import { seedThreads } from "./thread-test-helpers.js"; + +const OUTPUT_SCHEMA = { + type: "object" as const, + properties: { + $status: { type: "string" as const }, + note: { type: "string" as const }, + }, + required: ["$status"], + additionalProperties: false, +}; + +const DETAIL_SCHEMA = { + title: "ask-detail", + type: "object" as const, + required: ["sessionId", "model", "duration", "turnCount", "turns"], + properties: { + sessionId: { type: "string" as const }, + model: { type: "string" as const }, + duration: { type: "integer" as const }, + turnCount: { type: "integer" as const }, + turns: { + type: "array" as const, + items: { type: "string" as const, format: "ocas_ref" }, + }, + }, + additionalProperties: false, +}; + +const THREAD_ID = "01ASKSTEPTEST000000000" as ThreadId; +const STEP_SESSION_ID = "ses-original-step-001"; + +let tmpDir: string; + +beforeEach(async () => { + tmpDir = await mkdtemp(join(tmpdir(), "cli-uwf-step-ask-test-")); +}); + +afterEach(async () => { + await rm(tmpDir, { recursive: true, force: true }); +}); + +type SetupOpts = { + threadStatus: ThreadIndexEntry["status"]; + withDetail: boolean; + // The agent name (path or alias) to record in the head StepNode.agent field. + // Defaults to mockAgentPath. + stepAgentNameOverride: string | null; + // Pre-cached fork session-id. When provided, the cache file is written + // before running so the test can verify reuse semantics. + preCachedForkSessionId: string | null; +}; + +type SetupResult = { + casDir: string; + stepHash: CasRef; + startHash: CasRef; + workflowHash: CasRef; + detailHash: CasRef | null; + mockAgentPath: string; + failingAgentPath: string; + promptCapturePath: string; + modeCapturePath: string; + forkSessionCapturePath: string; + askSessionCapturePath: string; + envCapturePath: string; +}; + +async function setupAskFixture(opts: Partial = {}): Promise { + const cfg: SetupOpts = { + threadStatus: opts.threadStatus ?? "idle", + withDetail: opts.withDetail ?? true, + stepAgentNameOverride: opts.stepAgentNameOverride ?? null, + preCachedForkSessionId: opts.preCachedForkSessionId ?? null, + }; + + const casDir = join(tmpDir, "cas"); + await mkdir(casDir, { recursive: true }); + + const store = await openStore(casDir); + await bootstrap(store); + const schemas = await registerUwfSchemas(store); + const outputSchemaHash = await putSchema(store, OUTPUT_SCHEMA); + const detailSchemaHash = await putSchema(store, DETAIL_SCHEMA); + + const workflowHash = await store.cas.put(schemas.workflow, { + name: "test-ask", + description: "ask command integration test", + roles: { + worker: { + description: "Worker", + goal: "Work", + capabilities: [], + procedure: "work", + output: "result", + frontmatter: outputSchemaHash, + }, + }, + graph: { + $START: { + new: { role: "worker", prompt: "Start work", location: null }, + }, + worker: { ok: { role: "$END", prompt: "done", location: null } }, + }, + }); + + const startHash = await store.cas.put(schemas.startNode, { + workflow: workflowHash, + prompt: "Test ask task", + cwd: tmpDir, + }); + + // Set OCAS_HOME so seedThreads + in-test createUwfStore calls resolve to this CAS dir. + process.env.OCAS_HOME = casDir; + + // Capture file paths + const promptCapturePath = join(tmpDir, "captured-prompt.txt"); + const modeCapturePath = join(tmpDir, "captured-mode.txt"); + const forkSessionCapturePath = join(tmpDir, "captured-fork-session.txt"); + const askSessionCapturePath = join(tmpDir, "captured-ask-session.txt"); + const envCapturePath = join(tmpDir, "captured-env.txt"); + const mockAgentPath = join(tmpDir, "mock-agent.sh"); + const failingAgentPath = join(tmpDir, "failing-agent.sh"); + + // Build a detail node with sessionId so step ask can extract it + let detailHash: CasRef | null = null; + if (cfg.withDetail) { + const turnHash = await store.cas.put(detailSchemaHash, { + sessionId: STEP_SESSION_ID, + model: "test-model", + duration: 1000, + turnCount: 0, + turns: [], + }); + detailHash = turnHash; + } + + // Build the StepNode at thread head + const outputHash = await store.cas.put(outputSchemaHash, { $status: "ok" }); + const stepHash = await store.cas.put(schemas.stepNode, { + start: startHash, + prev: null, + role: "worker", + output: outputHash, + detail: detailHash, + agent: cfg.stepAgentNameOverride ?? mockAgentPath, + edgePrompt: "Start work", + startedAtMs: 1716600000000, + completedAtMs: 1716600001000, + cwd: tmpDir, + assembledPrompt: null, + usage: null, + }); + + // Seed thread index entry + await seedThreads(tmpDir, { + [THREAD_ID]: { + head: stepHash, + status: cfg.threadStatus, + suspendedRole: null, + suspendMessage: null, + completedAt: cfg.threadStatus === "completed" ? 1716600001000 : null, + }, + }); + + // Pre-seed the ask session cache so reuse tests have something to find. + if (cfg.preCachedForkSessionId !== null) { + const cachePath = join(tmpDir, "cache", "mock-sessions.json"); + await mkdir(dirname(cachePath), { recursive: true }); + await writeFile( + cachePath, + `${JSON.stringify({ [`${stepHash}:ask`]: cfg.preCachedForkSessionId }, null, 2)}\n`, + "utf8", + ); + } + + // Mock agent: dispatches based on `--mode` (ask|fork|run) and captures inputs. + // - --mode ask --session --prompt : writes to ask capture; echoes a fixed answer to stdout + // - --mode fork --session : writes to fork capture; prints "forked-from-" sessionId on stdout + // - default (uwf-* style invocation): captures and echoes adapter JSON (not used in this suite) + await writeFile( + mockAgentPath, + `#!/bin/sh +mode="" +prompt="" +session="" +detail="" +while [ $# -gt 0 ]; do + case "$1" in + --mode) mode="$2"; shift 2 ;; + --prompt) prompt="$2"; shift 2 ;; + --session) session="$2"; shift 2 ;; + --detail) detail="$2"; shift 2 ;; + *) shift ;; + esac +done +printf '%s' "$mode" > '${modeCapturePath}' +printf '%s' "$prompt" > '${promptCapturePath}' +printf 'OCAS_HOME=%s\\n' "$OCAS_HOME" > '${envCapturePath}' +case "$mode" in + fork) + printf '%s' "$session" > '${forkSessionCapturePath}' + new_id="forked-from-$session" + printf '%s\\n' "$new_id" + ;; + ask) + printf '%s' "$session" > '${askSessionCapturePath}' + # Print a deterministic answer that the cmdStepAsk path will hand back. + printf 'MOCK_ANSWER prompt=%s session=%s detail=%s\\n' "$prompt" "$session" "$detail" + ;; + *) + echo "{\\"stepHash\\":\\"unused\\"}" + ;; +esac +`, + { mode: 0o755 }, + ); + + await writeFile( + failingAgentPath, + `#!/bin/sh +echo "boom" >&2 +exit 7 +`, + { mode: 0o755 }, + ); + + // Minimal config so loadWorkflowConfig succeeds. + const configPath = join(tmpDir, "config.yaml"); + await writeFile( + configPath, + `defaultAgent: uwf-hermes\ndefaultModel: test-model\nagentOverrides: null\nagents: {}\nproviders: {}\nmodels: {}\n`, + ); + + return { + casDir, + stepHash, + startHash, + workflowHash, + detailHash, + mockAgentPath, + failingAgentPath, + promptCapturePath, + modeCapturePath, + forkSessionCapturePath, + askSessionCapturePath, + envCapturePath, + }; +} + +function runUwf( + args: string[], + casDir: string, +): { stdout: string; stderr: string; status: number } { + const cliPath = join(dirname(fileURLToPath(import.meta.url)), "..", "..", "dist", "cli.js"); + try { + const stdout = execFileSync(process.execPath, [cliPath, ...args], { + encoding: "utf8", + stdio: ["ignore", "pipe", "pipe"], + env: { + ...process.env, + UWF_HOME: tmpDir, + OCAS_HOME: casDir, + }, + cwd: tmpDir, + timeout: 30000, + }); + return { stdout, stderr: "", status: 0 }; + } catch (error) { + const err = error as NodeJS.ErrnoException & { + stdout?: string | Buffer; + stderr?: string | Buffer; + status?: number; + }; + return { + stdout: typeof err.stdout === "string" ? err.stdout : (err.stdout?.toString("utf8") ?? ""), + stderr: typeof err.stderr === "string" ? err.stderr : (err.stderr?.toString("utf8") ?? ""), + status: err.status ?? 1, + }; + } +} + +// ── Group 1: CLI argument validation ─────────────────────────────────────── + +describe("uwf step ask - CLI argument validation", () => { + test("1.1 missing step-hash exits non-zero", async () => { + const { casDir } = await setupAskFixture(); + const result = runUwf(["step", "ask"], casDir); + expect(result.status).not.toBe(0); + }); + + test("1.2 missing -p flag exits non-zero", async () => { + const { casDir, stepHash } = await setupAskFixture(); + const result = runUwf(["step", "ask", stepHash], casDir); + expect(result.status).not.toBe(0); + expect(result.stderr.toLowerCase()).toMatch(/required|missing|prompt/); + }); + + test("1.3 step-hash and -p accepted as valid invocation", async () => { + const { casDir, stepHash, mockAgentPath } = await setupAskFixture(); + const result = runUwf( + ["step", "ask", stepHash, "-p", "why?", "--agent", mockAgentPath], + casDir, + ); + expect(result.status).toBe(0); + }); +}); + +// ── Group 2: CAS validation errors ──────────────────────────────────────── + +describe("uwf step ask - CAS validation errors", () => { + test("2.1 non-existent CAS hash exits non-zero with 'not found'", async () => { + const { casDir, mockAgentPath } = await setupAskFixture(); + const result = runUwf( + ["step", "ask", "0000000000000", "-p", "why?", "--agent", mockAgentPath], + casDir, + ); + expect(result.status).not.toBe(0); + expect(result.stderr.toLowerCase()).toContain("not found"); + }); + + test("2.2 hash that is not a StepNode exits non-zero", async () => { + const { casDir, startHash, mockAgentPath } = await setupAskFixture(); + // Use the StartNode hash — it exists but is not a StepNode + const result = runUwf( + ["step", "ask", startHash, "-p", "why?", "--agent", mockAgentPath], + casDir, + ); + expect(result.status).not.toBe(0); + expect(result.stderr.toLowerCase()).toContain("not a stepnode"); + }); + + test("2.3 step with no detail ref exits non-zero", async () => { + const { casDir, stepHash, mockAgentPath } = await setupAskFixture({ withDetail: false }); + const result = runUwf( + ["step", "ask", stepHash, "-p", "why?", "--agent", mockAgentPath], + casDir, + ); + expect(result.status).not.toBe(0); + expect(result.stderr.toLowerCase()).toMatch(/no detail|detail.*missing|missing.*detail/); + }); +}); + +// ── Group 3: Successful ask (core behavior) ─────────────────────────────── + +describe("uwf step ask - successful ask (core)", () => { + test("3.1 stdout contains agent's response text", async () => { + const { casDir, stepHash, mockAgentPath } = await setupAskFixture(); + const result = runUwf( + ["step", "ask", stepHash, "-p", "why tar not zip?", "--agent", mockAgentPath], + casDir, + ); + expect(result.status).toBe(0); + expect(result.stdout).toContain("MOCK_ANSWER"); + expect(result.stdout).toContain("why tar not zip?"); + }); + + test("3.2 thread index entry (head, status) is identical before and after ask", async () => { + const { casDir, stepHash, mockAgentPath } = await setupAskFixture(); + + // Before ask: snapshot the thread state + const { createUwfStore, getThread } = await import("../store.js"); + const before = await createUwfStore(tmpDir); + const beforeEntry = getThread(before.varStore, THREAD_ID); + expect(beforeEntry).not.toBeNull(); + + const result = runUwf( + ["step", "ask", stepHash, "-p", "anything", "--agent", mockAgentPath], + casDir, + ); + expect(result.status).toBe(0); + + // After ask: thread state should be unchanged + const after = await createUwfStore(tmpDir); + const afterEntry = getThread(after.varStore, THREAD_ID); + expect(afterEntry).not.toBeNull(); + expect(afterEntry?.head).toBe(beforeEntry?.head); + expect(afterEntry?.status).toBe(beforeEntry?.status); + }); + + test("3.3 no new StepNode is written to CAS (step count unchanged)", async () => { + const { casDir, stepHash, mockAgentPath } = await setupAskFixture(); + + // Count StepNodes before + const { createUwfStore } = await import("../store.js"); + const before = await createUwfStore(tmpDir); + const stepSchemaHash = before.schemas.stepNode; + + function countStepNodes(uwfStore: typeof before): number { + const candidates = [stepHash]; + let count = 0; + for (const h of candidates) { + const node = uwfStore.store.cas.get(h); + if (node !== null && node.type === stepSchemaHash) count++; + } + return count; + } + + const beforeCount = countStepNodes(before); + expect(beforeCount).toBe(1); + + const result = runUwf( + ["step", "ask", stepHash, "-p", "anything", "--agent", mockAgentPath], + casDir, + ); + expect(result.status).toBe(0); + + // After ask: still only the seeded StepNode exists at head; no new step appended. + const after = await createUwfStore(tmpDir); + const headNode = after.store.cas.get(stepHash); + expect(headNode).not.toBeNull(); + expect(headNode?.type).toBe(after.schemas.stepNode); + + // Confirm thread head still points to the original step hash + const { getThread } = await import("../store.js"); + const entry = getThread(after.varStore, THREAD_ID); + expect(entry?.head).toBe(stepHash); + }); +}); + +// ── Group 4: Fork cache semantics ───────────────────────────────────────── + +describe("uwf step ask - fork cache", () => { + test("4.1 first ask creates a fork session and caches it", async () => { + const { casDir, stepHash, mockAgentPath, forkSessionCapturePath } = await setupAskFixture(); + + const result = runUwf( + ["step", "ask", stepHash, "-p", "first ask", "--agent", mockAgentPath], + casDir, + ); + expect(result.status).toBe(0); + + // The mock agent in fork mode receives the source session id + const forkArg = await readFile(forkSessionCapturePath, "utf8"); + expect(forkArg).toBe(STEP_SESSION_ID); + + // Cache file should now contain the ask key + const cachePath = join(tmpDir, "cache", "mock-sessions.json"); + const raw = await readFile(cachePath, "utf8"); + const parsed = JSON.parse(raw) as Record; + expect(parsed[`${stepHash}:ask`]).toBeDefined(); + expect(parsed[`${stepHash}:ask`]).toBe(`forked-from-${STEP_SESSION_ID}`); + }); + + test("4.2 second ask on same step reuses the cached fork session", async () => { + const cachedFork = "ses-already-forked-once"; + const { casDir, stepHash, mockAgentPath, modeCapturePath, askSessionCapturePath } = + await setupAskFixture({ preCachedForkSessionId: cachedFork }); + + const result = runUwf( + ["step", "ask", stepHash, "-p", "second ask", "--agent", mockAgentPath], + casDir, + ); + expect(result.status).toBe(0); + + // The mock agent must have been invoked in `ask` mode (no fork performed). + const mode = await readFile(modeCapturePath, "utf8"); + expect(mode).toBe("ask"); + + // The ask invocation should have received the cached fork session id. + const askArg = await readFile(askSessionCapturePath, "utf8"); + expect(askArg).toBe(cachedFork); + }); + + test("4.3 different step hash creates an independent fork", async () => { + // Run a first ask on the base step → caches forkA + const { casDir, stepHash, mockAgentPath } = await setupAskFixture(); + + const r1 = runUwf( + ["step", "ask", stepHash, "-p", "ask on step A", "--agent", mockAgentPath], + casDir, + ); + expect(r1.status).toBe(0); + + // Build a second StepNode (different hash) with a different sessionId so + // its detail-derived ask session is independent of the first. + const { createUwfStore } = await import("../store.js"); + const uwf = await createUwfStore(tmpDir); + const detailSchemaHash = await putSchema(uwf.store, DETAIL_SCHEMA); + const outputSchemaHash = await putSchema(uwf.store, OUTPUT_SCHEMA); + const otherDetailHash = await uwf.store.cas.put(detailSchemaHash, { + sessionId: "ses-original-step-002", + model: "test-model", + duration: 1000, + turnCount: 0, + turns: [], + }); + const otherOutputHash = await uwf.store.cas.put(outputSchemaHash, { + $status: "ok", + note: "alt", + }); + + // Reuse the same start ref the first step points to so the new step is a valid sibling. + const head = uwf.store.cas.get(stepHash); + const startRefFromHead = (head?.payload as { start: CasRef }).start; + const properOtherStep = await uwf.store.cas.put(uwf.schemas.stepNode, { + start: startRefFromHead, + prev: null, + role: "worker", + output: otherOutputHash, + detail: otherDetailHash, + agent: mockAgentPath, + edgePrompt: "Start work", + startedAtMs: 1716600002000, + completedAtMs: 1716600003000, + cwd: tmpDir, + assembledPrompt: null, + usage: null, + }); + + // sanity check we constructed a separate hash + expect(properOtherStep).not.toBe(stepHash); + + const r2 = runUwf( + ["step", "ask", properOtherStep, "-p", "ask on step B", "--agent", mockAgentPath], + casDir, + ); + expect(r2.status).toBe(0); + + const cachePath = join(tmpDir, "cache", "mock-sessions.json"); + const raw = await readFile(cachePath, "utf8"); + const parsed = JSON.parse(raw) as Record; + expect(parsed[`${stepHash}:ask`]).toBeDefined(); + expect(parsed[`${properOtherStep}:ask`]).toBeDefined(); + expect(parsed[`${stepHash}:ask`]).not.toBe(parsed[`${properOtherStep}:ask`]); + }); +}); + +// ── Group 5: Fallback (agent has no fork support) ───────────────────────── + +describe("uwf step ask - fallback path", () => { + test("5.1 fallback agent (no fork support) still answers via stdout", async () => { + // Use a fallback agent that ONLY supports `ask` mode without ever being asked + // to fork. The CLI should detect missing fork support and inject context instead. + const { casDir, stepHash, mockAgentPath } = await setupAskFixture(); + + // Create a fallback agent script that fails with non-zero exit on "fork" mode. + // Fallback path must NOT call mode=fork; it should call mode=ask directly. + const fallbackPath = join(tmpDir, "fallback-agent.sh"); + const promptCapture = join(tmpDir, "fallback-prompt.txt"); + const sessionCapture = join(tmpDir, "fallback-session.txt"); + const modeCapture = join(tmpDir, "fallback-mode.txt"); + await writeFile( + fallbackPath, + `#!/bin/sh +mode="" +prompt="" +session="" +detail="" +while [ $# -gt 0 ]; do + case "$1" in + --mode) mode="$2"; shift 2 ;; + --prompt) prompt="$2"; shift 2 ;; + --session) session="$2"; shift 2 ;; + --detail) detail="$2"; shift 2 ;; + *) shift ;; + esac +done +printf '%s' "$mode" > '${modeCapture}' +printf '%s' "$prompt" > '${promptCapture}' +printf '%s' "$session" > '${sessionCapture}' +case "$mode" in + fork) echo "fork not supported" >&2; exit 99 ;; + ask) printf 'FALLBACK_ANSWER for: %s (detail=%s)\\n' "$prompt" "$detail" ;; + *) echo "unknown" >&2; exit 1 ;; +esac +`, + { mode: 0o755 }, + ); + + const result = runUwf( + ["step", "ask", stepHash, "-p", "explain context", "--agent", fallbackPath, "--no-fork"], + casDir, + ); + expect(result.status).toBe(0); + expect(result.stdout).toContain("FALLBACK_ANSWER"); + expect(result.stdout).toContain("explain context"); + + // The fallback agent should be invoked in `ask` mode, with NO session id + // (since no fork happened). The detail ref must be passed for context injection. + const mode = await readFile(modeCapture, "utf8"); + expect(mode).toBe("ask"); + const session = await readFile(sessionCapture, "utf8"); + expect(session).toBe(""); + + // Make sure mockAgentPath's mock never ran. + void mockAgentPath; + }); + + test("5.2 fallback ask still does NOT mutate thread state", async () => { + const { casDir, stepHash } = await setupAskFixture(); + + const fallbackPath = join(tmpDir, "fallback-agent.sh"); + await writeFile( + fallbackPath, + `#!/bin/sh +mode="" +prompt="" +while [ $# -gt 0 ]; do + case "$1" in + --mode) mode="$2"; shift 2 ;; + --prompt) prompt="$2"; shift 2 ;; + *) shift ;; + esac +done +case "$mode" in + fork) echo "fork not supported" >&2; exit 99 ;; + ask) printf 'OK %s\\n' "$prompt" ;; + *) exit 1 ;; +esac +`, + { mode: 0o755 }, + ); + + const { createUwfStore, getThread } = await import("../store.js"); + const before = await createUwfStore(tmpDir); + const beforeEntry = getThread(before.varStore, THREAD_ID); + + const result = runUwf( + ["step", "ask", stepHash, "-p", "any", "--agent", fallbackPath, "--no-fork"], + casDir, + ); + expect(result.status).toBe(0); + + const after = await createUwfStore(tmpDir); + const afterEntry = getThread(after.varStore, THREAD_ID); + expect(afterEntry?.head).toBe(beforeEntry?.head); + expect(afterEntry?.status).toBe(beforeEntry?.status); + }); +}); + +// ── Group 6: Agent resolution ───────────────────────────────────────────── + +describe("uwf step ask - agent resolution", () => { + test("6.1 without --agent flag, agent is resolved from step's agent field", async () => { + // Step's agent field points at mockAgentPath by default. + const { casDir, stepHash, modeCapturePath, promptCapturePath } = await setupAskFixture(); + const result = runUwf(["step", "ask", stepHash, "-p", "explain"], casDir); + expect(result.status).toBe(0); + + // The mockAgentPath must have been invoked in ask mode with the user prompt. + const mode = await readFile(modeCapturePath, "utf8"); + expect(mode).toBe("ask"); + const captured = await readFile(promptCapturePath, "utf8"); + expect(captured).toBe("explain"); + }); + + test("6.2 --agent override beats step's recorded agent", async () => { + // Record a non-existent agent in step.agent. Provide a working one via --agent. + const { casDir, stepHash, mockAgentPath } = await setupAskFixture({ + stepAgentNameOverride: "uwf-does-not-exist", + }); + const result = runUwf( + ["step", "ask", stepHash, "-p", "explain", "--agent", mockAgentPath], + casDir, + ); + expect(result.status).toBe(0); + expect(result.stdout).toContain("MOCK_ANSWER"); + }); +}); diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 62b5195..62b726e 100755 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -12,7 +12,7 @@ import { cmdPromptWorkflowAuthoring, } from "./commands/prompt.js"; import { cmdSetup, cmdSetupInteractive, resolvePresetBaseUrl } from "./commands/setup.js"; -import { cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js"; +import { cmdStepAsk, cmdStepFork, cmdStepList, cmdStepRead, cmdStepShow } from "./commands/step.js"; import { cmdThreadCancel, cmdThreadExec, @@ -390,6 +390,32 @@ step }); }); +step + .command("ask") + .description( + "Ask a follow-up question to a historical step's agent (read-only; no thread mutation)", + ) + .argument("", "CAS hash of the StepNode to query") + .requiredOption("-p, --prompt ", "Question to ask the step's agent") + .option("--agent ", "Override agent command (defaults to the step's recorded agent)") + .option( + "--no-fork", + "Skip session-fork; spawn the agent in a fresh ask session and inject the step's detail ref for context", + ) + .action( + (stepHash: string, opts: { prompt: string; agent: string | undefined; fork: boolean }) => { + const storageRoot = resolveStorageRoot(); + runAction(async () => { + const stdout = await cmdStepAsk(storageRoot, stepHash as CasRef, { + prompt: opts.prompt, + agentOverride: opts.agent ?? null, + fork: opts.fork !== false, + }); + process.stdout.write(stdout.endsWith("\n") ? stdout : `${stdout}\n`); + }); + }, + ); + step .command("read") .description("Read a step's turns as human-readable markdown") diff --git a/packages/cli/src/commands/step.ts b/packages/cli/src/commands/step.ts index 12070b9..e7d1fe3 100644 --- a/packages/cli/src/commands/step.ts +++ b/packages/cli/src/commands/step.ts @@ -1,5 +1,8 @@ +import { execFileSync } from "node:child_process"; import type { CasStore } from "@ocas/core"; import type { + AgentAlias, + AgentConfig, CasRef, StartEntry, StepEntry, @@ -7,9 +10,12 @@ import type { ThreadForkOutput, ThreadId, ThreadStepsOutput, + WorkflowConfig, + WorkflowPayload, } from "@united-workforce/protocol"; import { generateUlid } from "@united-workforce/util"; -import { createUwfStore, setThread } from "../store.js"; +import { getAskSessionId, loadWorkflowConfig, setAskSessionId } from "@united-workforce/util-agent"; +import { createUwfStore, setThread, type UwfStore } from "../store.js"; import { collectOrderedSteps, expandDeep, @@ -341,3 +347,224 @@ export async function cmdStepRead( return formatStepMarkdown(stepHash, payload.role, payload.agent, turnData, selectedTurns); } + +// ── step ask ──────────────────────────────────────────────────────────────── + +function parseAgentOverride(override: string): AgentConfig { + const parts = override + .trim() + .split(/\s+/) + .filter((p) => p.length > 0); + const command = parts[0]; + if (command === undefined) { + fail("agent override must not be empty"); + } + return { command, args: parts.slice(1) }; +} + +function resolveAskAgentConfig( + config: WorkflowConfig, + workflow: WorkflowPayload | null, + role: string, + agentOverride: string | null, + recordedAgent: string, +): AgentConfig { + if (agentOverride !== null) { + const fromAlias = config.agents[agentOverride as AgentAlias]; + if (fromAlias !== undefined) { + return fromAlias; + } + return parseAgentOverride(agentOverride); + } + + // Try to resolve via the recorded agent name as a config alias. + const fromRecorded = config.agents[recordedAgent as AgentAlias]; + if (fromRecorded !== undefined) { + return fromRecorded; + } + + // Fall back to default agent for the workflow / role. + if (workflow !== null && config.agentOverrides !== null) { + const roleOverrides = config.agentOverrides[workflow.name]; + if (roleOverrides !== undefined && roleOverrides[role] !== undefined) { + const alias = roleOverrides[role]; + const agentConfig = config.agents[alias]; + if (agentConfig !== undefined) { + return agentConfig; + } + } + } + + // Treat the recorded value as a raw command path. + return parseAgentOverride(recordedAgent); +} + +/** + * Derive the agent name used for cache file partitioning from an executable + * path or alias. Examples: + * uwf-hermes → hermes + * uwf-claude-code → claude-code + * /tmp/mock-agent.sh → mock + * /usr/bin/agent → agent + */ +function deriveAgentName(commandPath: string): string { + const basename = commandPath.split(/[/\\]/).pop() ?? commandPath; + // Strip a trailing extension (.sh, .js, .mjs, .cjs) + const noExt = basename.replace(/\.(sh|js|mjs|cjs|ts)$/i, ""); + // Strip the `uwf-` prefix introduced by agentLabel(). + const noPrefix = noExt.startsWith("uwf-") ? noExt.slice(4) : noExt; + // Strip the trailing `-agent` suffix used by tests / generic agent shells. + const noSuffix = noPrefix.endsWith("-agent") ? noPrefix.slice(0, -"-agent".length) : noPrefix; + return noSuffix === "" ? noExt : noSuffix; +} + +function loadDetailNode( + store: CasStore, + detailRef: CasRef, +): { sessionId: string | null; payload: Record } { + const detailNode = store.get(detailRef); + if (detailNode === null) { + fail(`detail node not found: ${detailRef}`); + } + const payload = detailNode.payload as Record; + const sessionId = typeof payload.sessionId === "string" ? payload.sessionId : null; + return { sessionId, payload }; +} + +function spawnAskAgent(agent: AgentConfig, argv: string[], cwd: string): { stdout: string } { + try { + const stdout = execFileSync(agent.command, [...agent.args, ...argv], { + encoding: "utf8", + stdio: ["ignore", "pipe", "pipe"], + maxBuffer: 50 * 1024 * 1024, + cwd, + }); + return { stdout }; + } catch (e) { + const err = e as NodeJS.ErrnoException & { stderr?: Buffer | string | null }; + if (err.code === "ENOENT") { + fail( + `"${agent.command}" not found in PATH. Install it or check your PATH config. Run: which ${agent.command}`, + ); + } + const stderr = + err.stderr == null + ? "" + : typeof err.stderr === "string" + ? err.stderr + : err.stderr.toString("utf8"); + const detail = stderr.trim() !== "" ? `: ${stderr.trim()}` : ""; + fail(`agent command failed (${agent.command})${detail}`); + } +} + +async function resolveAskWorkflow( + uwf: UwfStore, + payload: StepNodePayload, +): Promise { + const startNode = uwf.store.cas.get(payload.start); + if (startNode === null) { + return null; + } + const start = startNode.payload as { workflow: CasRef }; + const workflowNode = uwf.store.cas.get(start.workflow); + if (workflowNode === null) { + return null; + } + return workflowNode.payload as WorkflowPayload; +} + +async function performFork( + agent: AgentConfig, + agentName: string, + stepHash: CasRef, + sourceSessionId: string, + storageRoot: string, + cwd: string, +): Promise { + const cached = await getAskSessionId(agentName, stepHash, storageRoot); + if (cached !== null) { + return cached; + } + const { stdout } = spawnAskAgent(agent, ["--mode", "fork", "--session", sourceSessionId], cwd); + const newSessionId = stdout.trim().split("\n").pop()?.trim() ?? ""; + if (newSessionId === "") { + fail(`agent fork did not return a session id (${agent.command})`); + } + await setAskSessionId(agentName, stepHash, newSessionId, storageRoot); + return newSessionId; +} + +export type CmdStepAskOptions = { + prompt: string; + agentOverride: string | null; + /** When false, skip session forking and pass detail ref for context injection. */ + fork: boolean; +}; + +/** + * Ask a follow-up question to a historical step's agent (read-only). + * + * Does NOT write a new StepNode and does NOT mutate thread state. The agent's + * raw stdout is returned so the CLI entry point can stream it directly. + */ +export async function cmdStepAsk( + storageRoot: string, + stepHash: CasRef, + options: CmdStepAskOptions, +): Promise { + const uwf = await createUwfStore(storageRoot); + const node = uwf.store.cas.get(stepHash); + if (node === null) { + fail(`CAS node not found: ${stepHash}`); + } + if (node.type !== uwf.schemas.stepNode) { + fail(`node ${stepHash} is not a StepNode`); + } + const payload = node.payload as StepNodePayload; + if (payload.detail === null) { + fail(`step ${stepHash} has no detail; cannot ask`); + } + + const detailRef = payload.detail; + const { sessionId: sourceSessionId } = loadDetailNode(uwf.store.cas, detailRef); + + const workflow = await resolveAskWorkflow(uwf, payload); + const config = await loadWorkflowConfig(storageRoot); + const agent = resolveAskAgentConfig( + config, + workflow, + payload.role, + options.agentOverride, + payload.agent, + ); + const agentName = deriveAgentName(agent.command); + + const cwd = payload.cwd !== "" ? payload.cwd : process.cwd(); + + // Fork path: fork (or reuse cached fork) → ask with that session. + if (options.fork && sourceSessionId !== null) { + const askSessionId = await performFork( + agent, + agentName, + stepHash, + sourceSessionId, + storageRoot, + cwd, + ); + const argv = ["--mode", "ask", "--session", askSessionId, "--prompt", options.prompt]; + if (detailRef !== null) { + argv.push("--detail", detailRef); + } + const { stdout } = spawnAskAgent(agent, argv, cwd); + return stdout; + } + + // Fallback path: ask without forking; inject detail ref for context. + const argv = ["--mode", "ask", "--prompt", options.prompt]; + if (detailRef !== null) { + argv.push("--detail", detailRef); + } + const { stdout } = spawnAskAgent(agent, argv, cwd); + return stdout; +} diff --git a/packages/util/src/usage-reference.ts b/packages/util/src/usage-reference.ts index e3cea8c..f2e7d76 100644 --- a/packages/util/src/usage-reference.ts +++ b/packages/util/src/usage-reference.ts @@ -94,10 +94,15 @@ start → exec (repeat) → thread reaches $END → auto-completed uwf step list # list all steps uwf step show # show step details uwf step fork # fork thread from a step (branch) +uwf step ask -p [--agent ] [--no-fork] + # ask a follow-up question to the step's agent + # (read-only; no new step, no thread mutation) \`\`\` Forking creates a new thread that shares history up to the fork point — useful for retrying from a known-good state. +\`step ask\` re-opens the agent session that produced \`\` and returns its answer on stdout. Subsequent asks reuse the same forked session via the per-agent ask-cache; \`--no-fork\` runs the agent fresh with the step's detail ref injected for context. + ## CAS Commands Use the \`ocas\` CLI for direct CAS operations (\`~/.ocas/\` store, shared with \`uwf\`):