fix: stabilize shadow workflow smoke comparison

2026-04-08 22:14:33 +02:00
parent 375339eb74
commit d685031c1a
2 changed files with 46 additions and 5 deletions
@@ -120,6 +120,7 @@ Parallel execution ownership and stage gates are defined in [`docs/workflows/WOR
  - `shadow` must finish with a successful order line and a comparison verdict of `pass`
  - `warn` or `fail` means legacy remains authoritative
  - `graph` may only be enabled on real output types after the shadow command passes cleanly
 - Progress: the canonical still smoke flow now passes live in `legacy` and `graph`; `shadow` stabilizes after a short observer-output lag and currently reports `warn` because the observer image differs slightly, so legacy remains authoritative for rollout decisions.
 ## Definition of Done
@@ -42,6 +42,7 @@ SAMPLE_STEP = Path(__file__).parent.parent / "step-sample-file" / "81113-l_cut.s
 RENDER_TIMEOUT_SECONDS = 300  # 5 minutes per render
 POLL_INTERVAL_SECONDS = 5
 CAD_PROCESSING_TIMEOUT = 120  # 2 minutes for STEP processing
 COMPARISON_TIMEOUT_SECONDS = 60
 GREEN = "\033[92m"
 RED = "\033[91m"
@@ -508,6 +509,44 @@ def wait_for_workflow_run(
    return None
 def wait_for_workflow_comparison(
    client: APIClient,
    *,
    workflow_run_id: str,
    timeout_seconds: int = COMPARISON_TIMEOUT_SECONDS,
 ) -> dict | None:
    deadline = time.time() + timeout_seconds
    last_status = None
    while time.time() < deadline:
        resp = client.get(f"/workflows/runs/{workflow_run_id}/comparison")
        if resp.status_code != 200:
            time.sleep(2)
            continue
        comparison = resp.json()
        status = comparison.get("status")
        authoritative_exists = bool(comparison.get("authoritative_output", {}).get("exists"))
        observer_exists = bool(comparison.get("observer_output", {}).get("exists"))
        if status != last_status:
            info(
                "  Comparison poll: "
                f"status={status} authoritative_exists={authoritative_exists} "
                f"observer_exists={observer_exists}"
            )
            last_status = status
        # Shadow observer artifacts can arrive shortly after the workflow run is visible.
        # Treat missing/processing observer states as transient until the timeout expires.
        if authoritative_exists and observer_exists and status not in {"missing_observer", "pending", "running"}:
            return comparison
        time.sleep(2)
    return None
 # ---------------------------------------------------------------------------
 # Test: Order creation + submit + dispatch + wait
 # ---------------------------------------------------------------------------
@@ -748,12 +787,13 @@ def test_workflow_still_smoke(
            )
    if success and execution_mode == "shadow" and workflow_run is not None:
-        resp_cmp = client.get(f"/workflows/runs/{workflow_run['id']}/comparison")
+        comparison = wait_for_workflow_comparison(
-        if resp_cmp.status_code != 200:
+            client,
-            warn(f"Shadow comparison lookup failed: {resp_cmp.status_code} {resp_cmp.text[:300]}")
+            workflow_run_id=workflow_run["id"],
        )
        if comparison is None:
            warn("Shadow comparison did not stabilize before timeout")
            return success
        comparison = resp_cmp.json()
        rollout_gate = evaluate_rollout_gate_from_comparison(comparison)
        verdict = rollout_gate["verdict"]
        info(