diff --git a/docs/workflows/WORKFLOW_DELIVERY_CHECKLIST.md b/docs/workflows/WORKFLOW_DELIVERY_CHECKLIST.md index 64c3ffe..e2ee62d 100644 --- a/docs/workflows/WORKFLOW_DELIVERY_CHECKLIST.md +++ b/docs/workflows/WORKFLOW_DELIVERY_CHECKLIST.md @@ -120,6 +120,7 @@ Parallel execution ownership and stage gates are defined in [`docs/workflows/WOR - `shadow` must finish with a successful order line and a comparison verdict of `pass` - `warn` or `fail` means legacy remains authoritative - `graph` may only be enabled on real output types after the shadow command passes cleanly +- Progress: the canonical still smoke flow now passes live in `legacy` and `graph`; `shadow` stabilizes after a short observer-output lag and currently reports `warn` because the observer image differs slightly, so legacy remains authoritative for rollout decisions. ## Definition of Done diff --git a/scripts/test_render_pipeline.py b/scripts/test_render_pipeline.py index 7d6637d..175194e 100644 --- a/scripts/test_render_pipeline.py +++ b/scripts/test_render_pipeline.py @@ -42,6 +42,7 @@ SAMPLE_STEP = Path(__file__).parent.parent / "step-sample-file" / "81113-l_cut.s RENDER_TIMEOUT_SECONDS = 300 # 5 minutes per render POLL_INTERVAL_SECONDS = 5 CAD_PROCESSING_TIMEOUT = 120 # 2 minutes for STEP processing +COMPARISON_TIMEOUT_SECONDS = 60 GREEN = "\033[92m" RED = "\033[91m" @@ -508,6 +509,44 @@ def wait_for_workflow_run( return None +def wait_for_workflow_comparison( + client: APIClient, + *, + workflow_run_id: str, + timeout_seconds: int = COMPARISON_TIMEOUT_SECONDS, +) -> dict | None: + deadline = time.time() + timeout_seconds + last_status = None + + while time.time() < deadline: + resp = client.get(f"/workflows/runs/{workflow_run_id}/comparison") + if resp.status_code != 200: + time.sleep(2) + continue + + comparison = resp.json() + status = comparison.get("status") + authoritative_exists = bool(comparison.get("authoritative_output", {}).get("exists")) + observer_exists = bool(comparison.get("observer_output", {}).get("exists")) + + if status != last_status: + info( + " Comparison poll: " + f"status={status} authoritative_exists={authoritative_exists} " + f"observer_exists={observer_exists}" + ) + last_status = status + + # Shadow observer artifacts can arrive shortly after the workflow run is visible. + # Treat missing/processing observer states as transient until the timeout expires. + if authoritative_exists and observer_exists and status not in {"missing_observer", "pending", "running"}: + return comparison + + time.sleep(2) + + return None + + # --------------------------------------------------------------------------- # Test: Order creation + submit + dispatch + wait # --------------------------------------------------------------------------- @@ -748,12 +787,13 @@ def test_workflow_still_smoke( ) if success and execution_mode == "shadow" and workflow_run is not None: - resp_cmp = client.get(f"/workflows/runs/{workflow_run['id']}/comparison") - if resp_cmp.status_code != 200: - warn(f"Shadow comparison lookup failed: {resp_cmp.status_code} {resp_cmp.text[:300]}") + comparison = wait_for_workflow_comparison( + client, + workflow_run_id=workflow_run["id"], + ) + if comparison is None: + warn("Shadow comparison did not stabilize before timeout") return success - - comparison = resp_cmp.json() rollout_gate = evaluate_rollout_gate_from_comparison(comparison) verdict = rollout_gate["verdict"] info(