diff --git a/docs/workflows/WORKFLOW_DELIVERY_CHECKLIST.md b/docs/workflows/WORKFLOW_DELIVERY_CHECKLIST.md
index 64c3ffe..e2ee62d 100644
--- a/docs/workflows/WORKFLOW_DELIVERY_CHECKLIST.md
+++ b/docs/workflows/WORKFLOW_DELIVERY_CHECKLIST.md
@@ -120,6 +120,7 @@ Parallel execution ownership and stage gates are defined in [`docs/workflows/WOR
   - `shadow` must finish with a successful order line and a comparison verdict of `pass`
   - `warn` or `fail` means legacy remains authoritative
   - `graph` may only be enabled on real output types after the shadow command passes cleanly
+- Progress: the canonical still smoke flow now passes live in `legacy` and `graph`; `shadow` stabilizes after a short observer-output lag and currently reports `warn` because the observer image differs slightly, so legacy remains authoritative for rollout decisions.
 
 ## Definition of Done
 
diff --git a/scripts/test_render_pipeline.py b/scripts/test_render_pipeline.py
index 7d6637d..175194e 100644
--- a/scripts/test_render_pipeline.py
+++ b/scripts/test_render_pipeline.py
@@ -42,6 +42,7 @@ SAMPLE_STEP = Path(__file__).parent.parent / "step-sample-file" / "81113-l_cut.s
 RENDER_TIMEOUT_SECONDS = 300  # 5 minutes per render
 POLL_INTERVAL_SECONDS = 5
 CAD_PROCESSING_TIMEOUT = 120  # 2 minutes for STEP processing
+COMPARISON_TIMEOUT_SECONDS = 60
 
 GREEN = "\033[92m"
 RED = "\033[91m"
@@ -508,6 +509,44 @@ def wait_for_workflow_run(
     return None
 
 
+def wait_for_workflow_comparison(
+    client: APIClient,
+    *,
+    workflow_run_id: str,
+    timeout_seconds: int = COMPARISON_TIMEOUT_SECONDS,
+) -> dict | None:
+    deadline = time.time() + timeout_seconds
+    last_status = None
+
+    while time.time() < deadline:
+        resp = client.get(f"/workflows/runs/{workflow_run_id}/comparison")
+        if resp.status_code != 200:
+            time.sleep(2)
+            continue
+
+        comparison = resp.json()
+        status = comparison.get("status")
+        authoritative_exists = bool(comparison.get("authoritative_output", {}).get("exists"))
+        observer_exists = bool(comparison.get("observer_output", {}).get("exists"))
+
+        if status != last_status:
+            info(
+                "  Comparison poll: "
+                f"status={status} authoritative_exists={authoritative_exists} "
+                f"observer_exists={observer_exists}"
+            )
+            last_status = status
+
+        # Shadow observer artifacts can arrive shortly after the workflow run is visible.
+        # Treat missing/processing observer states as transient until the timeout expires.
+        if authoritative_exists and observer_exists and status not in {"missing_observer", "pending", "running"}:
+            return comparison
+
+        time.sleep(2)
+
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Test: Order creation + submit + dispatch + wait
 # ---------------------------------------------------------------------------
@@ -748,12 +787,13 @@ def test_workflow_still_smoke(
             )
 
     if success and execution_mode == "shadow" and workflow_run is not None:
-        resp_cmp = client.get(f"/workflows/runs/{workflow_run['id']}/comparison")
-        if resp_cmp.status_code != 200:
-            warn(f"Shadow comparison lookup failed: {resp_cmp.status_code} {resp_cmp.text[:300]}")
+        comparison = wait_for_workflow_comparison(
+            client,
+            workflow_run_id=workflow_run["id"],
+        )
+        if comparison is None:
+            warn("Shadow comparison did not stabilize before timeout")
             return success
-
-        comparison = resp_cmp.json()
         rollout_gate = evaluate_rollout_gate_from_comparison(comparison)
         verdict = rollout_gate["verdict"]
         info(