fix: stabilize shadow workflow smoke comparison
This commit is contained in:
@@ -120,6 +120,7 @@ Parallel execution ownership and stage gates are defined in [`docs/workflows/WOR
|
|||||||
- `shadow` must finish with a successful order line and a comparison verdict of `pass`
|
- `shadow` must finish with a successful order line and a comparison verdict of `pass`
|
||||||
- `warn` or `fail` means legacy remains authoritative
|
- `warn` or `fail` means legacy remains authoritative
|
||||||
- `graph` may only be enabled on real output types after the shadow command passes cleanly
|
- `graph` may only be enabled on real output types after the shadow command passes cleanly
|
||||||
|
- Progress: the canonical still smoke flow now passes live in `legacy` and `graph`; `shadow` stabilizes after a short observer-output lag and currently reports `warn` because the observer image differs slightly, so legacy remains authoritative for rollout decisions.
|
||||||
|
|
||||||
## Definition of Done
|
## Definition of Done
|
||||||
|
|
||||||
|
|||||||
@@ -42,6 +42,7 @@ SAMPLE_STEP = Path(__file__).parent.parent / "step-sample-file" / "81113-l_cut.s
|
|||||||
RENDER_TIMEOUT_SECONDS = 300 # 5 minutes per render
|
RENDER_TIMEOUT_SECONDS = 300 # 5 minutes per render
|
||||||
POLL_INTERVAL_SECONDS = 5
|
POLL_INTERVAL_SECONDS = 5
|
||||||
CAD_PROCESSING_TIMEOUT = 120 # 2 minutes for STEP processing
|
CAD_PROCESSING_TIMEOUT = 120 # 2 minutes for STEP processing
|
||||||
|
COMPARISON_TIMEOUT_SECONDS = 60
|
||||||
|
|
||||||
GREEN = "\033[92m"
|
GREEN = "\033[92m"
|
||||||
RED = "\033[91m"
|
RED = "\033[91m"
|
||||||
@@ -508,6 +509,44 @@ def wait_for_workflow_run(
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def wait_for_workflow_comparison(
|
||||||
|
client: APIClient,
|
||||||
|
*,
|
||||||
|
workflow_run_id: str,
|
||||||
|
timeout_seconds: int = COMPARISON_TIMEOUT_SECONDS,
|
||||||
|
) -> dict | None:
|
||||||
|
deadline = time.time() + timeout_seconds
|
||||||
|
last_status = None
|
||||||
|
|
||||||
|
while time.time() < deadline:
|
||||||
|
resp = client.get(f"/workflows/runs/{workflow_run_id}/comparison")
|
||||||
|
if resp.status_code != 200:
|
||||||
|
time.sleep(2)
|
||||||
|
continue
|
||||||
|
|
||||||
|
comparison = resp.json()
|
||||||
|
status = comparison.get("status")
|
||||||
|
authoritative_exists = bool(comparison.get("authoritative_output", {}).get("exists"))
|
||||||
|
observer_exists = bool(comparison.get("observer_output", {}).get("exists"))
|
||||||
|
|
||||||
|
if status != last_status:
|
||||||
|
info(
|
||||||
|
" Comparison poll: "
|
||||||
|
f"status={status} authoritative_exists={authoritative_exists} "
|
||||||
|
f"observer_exists={observer_exists}"
|
||||||
|
)
|
||||||
|
last_status = status
|
||||||
|
|
||||||
|
# Shadow observer artifacts can arrive shortly after the workflow run is visible.
|
||||||
|
# Treat missing/processing observer states as transient until the timeout expires.
|
||||||
|
if authoritative_exists and observer_exists and status not in {"missing_observer", "pending", "running"}:
|
||||||
|
return comparison
|
||||||
|
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Test: Order creation + submit + dispatch + wait
|
# Test: Order creation + submit + dispatch + wait
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -748,12 +787,13 @@ def test_workflow_still_smoke(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if success and execution_mode == "shadow" and workflow_run is not None:
|
if success and execution_mode == "shadow" and workflow_run is not None:
|
||||||
resp_cmp = client.get(f"/workflows/runs/{workflow_run['id']}/comparison")
|
comparison = wait_for_workflow_comparison(
|
||||||
if resp_cmp.status_code != 200:
|
client,
|
||||||
warn(f"Shadow comparison lookup failed: {resp_cmp.status_code} {resp_cmp.text[:300]}")
|
workflow_run_id=workflow_run["id"],
|
||||||
|
)
|
||||||
|
if comparison is None:
|
||||||
|
warn("Shadow comparison did not stabilize before timeout")
|
||||||
return success
|
return success
|
||||||
|
|
||||||
comparison = resp_cmp.json()
|
|
||||||
rollout_gate = evaluate_rollout_gate_from_comparison(comparison)
|
rollout_gate = evaluate_rollout_gate_from_comparison(comparison)
|
||||||
verdict = rollout_gate["verdict"]
|
verdict = rollout_gate["verdict"]
|
||||||
info(
|
info(
|
||||||
|
|||||||
Reference in New Issue
Block a user