feat: add workflow rollout gate signals

2026-04-08 21:44:02 +02:00
parent 8c9648d5dc
commit fe46dabfc5
4 changed files with 1624 additions and 101 deletions
@@ -53,6 +53,9 @@ passed = []
 failed = []
 warnings = []

+ROLLOUT_PASS_MAX_MEAN_PIXEL_DELTA = 0.0
+ROLLOUT_WARN_MAX_MEAN_PIXEL_DELTA = 0.02
+

 # ---------------------------------------------------------------------------
 # Helpers
@@ -83,6 +86,55 @@ def section(title: str):
    print(f"{BLUE}{'='*60}{RESET}")


+def evaluate_rollout_gate_from_comparison(comparison: dict) -> dict:
+    reasons: list[str] = []
+    mean_pixel_delta = comparison.get("mean_pixel_delta")
+    exact_match = comparison.get("exact_match")
+    dimensions_match = comparison.get("dimensions_match")
+    status = comparison.get("status")
+
+    authoritative_exists = bool(comparison.get("authoritative_output", {}).get("exists"))
+    observer_exists = bool(comparison.get("observer_output", {}).get("exists"))
+
+    if not authoritative_exists:
+        verdict = "fail"
+        reasons.append("Authoritative legacy output is missing.")
+    elif not observer_exists:
+        verdict = "fail"
+        reasons.append("Observer workflow output is missing.")
+    elif exact_match:
+        verdict = "pass"
+        reasons.append("Observer output matches the authoritative legacy output byte-for-byte.")
+    elif dimensions_match is False:
+        verdict = "fail"
+        reasons.append("Observer output dimensions differ from the authoritative legacy output.")
+    elif mean_pixel_delta is None:
+        verdict = "fail"
+        reasons.append(f"Workflow comparison did not produce a pixel delta (status={status}).")
+    elif mean_pixel_delta <= ROLLOUT_PASS_MAX_MEAN_PIXEL_DELTA:
+        verdict = "pass"
+        reasons.append("Observer output is visually identical within the pass threshold.")
+    elif mean_pixel_delta <= ROLLOUT_WARN_MAX_MEAN_PIXEL_DELTA:
+        verdict = "warn"
+        reasons.append("Observer output differs slightly but remains within the warn threshold.")
+    else:
+        verdict = "fail"
+        reasons.append("Observer output exceeds the rollout parity threshold.")
+
+    if mean_pixel_delta is not None and not exact_match:
+        reasons.append(
+            f"Mean pixel delta {mean_pixel_delta:.6f}; "
+            f"pass<={ROLLOUT_PASS_MAX_MEAN_PIXEL_DELTA:.6f}, "
+            f"warn<={ROLLOUT_WARN_MAX_MEAN_PIXEL_DELTA:.6f}."
+        )
+
+    return {
+        "verdict": verdict,
+        "ready": verdict == "pass",
+        "reasons": reasons,
+    }
+
+
 class APIClient:
    def __init__(self, host: str, email: str, password: str):
        self.host = host.rstrip("/")
@@ -93,7 +145,7 @@ class APIClient:
    def _login(self, email: str, password: str):
        resp = self.session.post(
            f"{self.host}/api/auth/login",
-            data={"username": email, "password": password},
+            json={"email": email, "password": password},
        )
        resp.raise_for_status()
        data = resp.json()
@@ -110,6 +162,44 @@ class APIClient:
        return self.session.delete(f"{self.host}/api{path}", **kwargs)


+def build_graph_still_config() -> dict:
+    return {
+        "version": 1,
+        "ui": {"preset": "still_graph", "execution_mode": "graph"},
+        "nodes": [
+            {
+                "id": "setup",
+                "step": "order_line_setup",
+                "params": {},
+                "ui": {"label": "Order Line Setup", "position": {"x": 0, "y": 100}},
+            },
+            {
+                "id": "template",
+                "step": "resolve_template",
+                "params": {},
+                "ui": {"label": "Resolve Template", "position": {"x": 220, "y": 100}},
+            },
+            {
+                "id": "render",
+                "step": "blender_still",
+                "params": {},
+                "ui": {"type": "renderNode", "label": "Still Render", "position": {"x": 440, "y": 100}},
+            },
+            {
+                "id": "output",
+                "step": "output_save",
+                "params": {},
+                "ui": {"type": "outputNode", "label": "Save Output", "position": {"x": 660, "y": 100}},
+            },
+        ],
+        "edges": [
+            {"from": "setup", "to": "template"},
+            {"from": "template", "to": "render"},
+            {"from": "render", "to": "output"},
+        ],
+    }
+
+
 # ---------------------------------------------------------------------------
 # Test: Render health endpoint
 # ---------------------------------------------------------------------------
@@ -174,24 +264,34 @@ def test_step_upload(client: APIClient, step_file: Path) -> str | None:
    cad_file_id = data["cad_file_id"]
    ok(f"STEP uploaded → cad_file_id={cad_file_id[:8]}... status={data.get('status')}")

-    # Poll for completed processing
+    # Poll the existing CAD endpoints. There is no GET /api/cad/{id}; the most
+    # reliable readiness signal is /objects returning 200 with processing_status.
    info(f"Waiting for CAD processing (timeout={CAD_PROCESSING_TIMEOUT}s)...")
    deadline = time.time() + CAD_PROCESSING_TIMEOUT
    last_status = None
    while time.time() < deadline:
-        resp = client.get(f"/cad/{cad_file_id}")
-        if resp.status_code == 200:
-            cad = resp.json()
+        resp_objects = client.get(f"/cad/{cad_file_id}/objects")
+        if resp_objects.status_code == 200:
+            cad = resp_objects.json()
            status = cad.get("processing_status")
            if status != last_status:
                info(f"  CAD status: {status}")
                last_status = status
            if status == "completed":
-                ok(f"CAD processing completed (thumbnail rendered)")
+                ok("CAD processing completed (parsed objects available)")
                return cad_file_id
            if status == "failed":
                fail(f"CAD processing FAILED: {cad.get('error_message', 'unknown error')}")
                return None
+
+        resp_thumb = client.get(f"/cad/{cad_file_id}/thumbnail")
+        if resp_thumb.status_code == 200:
+            if last_status != "completed":
+                info("  CAD status: completed")
+                last_status = "completed"
+            ok("CAD processing completed (thumbnail available)")
+            return cad_file_id
+
        time.sleep(POLL_INTERVAL_SECONDS)

    fail(f"CAD processing timed out after {CAD_PROCESSING_TIMEOUT}s (last status: {last_status})")
@@ -207,17 +307,14 @@ def test_order_render(
    cad_file_id: str,
    output_type_ids: list[str],
    test_label: str,
+    *,
+    use_graph_dispatch: bool = False,
 ) -> bool:
    """Create a minimal order, submit, dispatch renders, wait for completion."""
    section(f"3. Order Render — {test_label}")
    info(f"Output types: {len(output_type_ids)}")

    # Get a product that uses this CAD file
-    resp = client.get(f"/cad/{cad_file_id}")
-    if resp.status_code != 200:
-        fail(f"CAD file lookup failed: {resp.status_code}")
-        return False
-
    # Find or create a product linked to this CAD file
    product_id = None
    resp_products = client.get("/products/?limit=100")
@@ -245,46 +342,41 @@ def test_order_render(
        product_id = resp_create.json()["id"]
        ok(f"Created test product: {product_id[:8]}...")

-    # Build output_type_selections for one product
-    ot_selections = [{"product_id": product_id, "output_type_id": ot_id} for ot_id in output_type_ids]
-
-    # Create order via wizard endpoint
-    resp_order = client.post("/orders/product-order", json={
-        "product_id": product_id,
-        "output_type_selections": [
-            {"output_type_id": ot_id}
-            for ot_id in output_type_ids
-        ],
-    })
+    resp_order = client.post(
+        "/orders",
+        json={
+            "notes": f"Render pipeline integration test: {test_label}",
+            "items": [],
+            "lines": [
+                {"product_id": product_id, "output_type_id": ot_id}
+                for ot_id in output_type_ids
+            ],
+        },
+    )
    if resp_order.status_code not in (200, 201):
-        # Fallback: try to find existing submitted order
-        warn(f"Product order wizard not available ({resp_order.status_code}), looking for existing order lines...")
-        return _test_existing_renders(client, product_id, output_type_ids)
+        fail(f"Order creation failed: {resp_order.status_code} {resp_order.text[:300]}")
+        return False

    order = resp_order.json()
    order_id = order["id"]
    ok(f"Order created: {order.get('order_number')} (id={order_id[:8]}...)")

-    return _submit_and_wait(client, order_id, output_type_ids)
+    return _submit_and_wait(
+        client,
+        order,
+        output_type_ids,
+        use_graph_dispatch=use_graph_dispatch,
+    )


-def _test_existing_renders(client: APIClient, product_id: str, output_type_ids: list[str]) -> bool:
-    """Find existing order lines for a product and wait for completion."""
-    resp = client.get(f"/orders/?limit=20")
-    if resp.status_code != 200:
-        fail("Could not list orders")
-        return False
-    orders = resp.json()
-    if isinstance(orders, dict):
-        orders = orders.get("items", [])
-    for order in orders:
-        if order.get("status") in ("submitted", "processing", "rendering"):
-            return _submit_and_wait(client, order["id"], output_type_ids)
-    warn("No suitable existing orders found for render test")
-    return True  # non-blocking warning
-
-
-def _submit_and_wait(client: APIClient, order_id: str, output_type_ids: list[str]) -> bool:
+def _submit_and_wait(
+    client: APIClient,
+    order: dict,
+    output_type_ids: list[str],
+    *,
+    use_graph_dispatch: bool = False,
+) -> bool:
+    order_id = order["id"]
    # Submit
    resp_sub = client.post(f"/orders/{order_id}/submit")
    if resp_sub.status_code not in (200, 201, 204):
@@ -296,14 +388,34 @@ def _submit_and_wait(client: APIClient, order_id: str, output_type_ids: list[str
    else:
        ok("Order submitted")

-    # Dispatch renders
-    resp_disp = client.post(f"/orders/{order_id}/dispatch-renders")
-    if resp_disp.status_code not in (200, 201, 204):
-        fail(f"Dispatch renders failed: {resp_disp.status_code} {resp_disp.text[:200]}")
-        return False
-    dispatch_data = resp_disp.json() if resp_disp.content else {}
-    dispatched = dispatch_data.get("dispatched", "?")
-    ok(f"Renders dispatched ({dispatched} lines)")
+    dispatch_run_id = None
+    if use_graph_dispatch:
+        lines = order.get("lines", [])
+        if len(lines) != 1:
+            fail("Graph mode currently expects exactly one order line per test order")
+            return False
+        line_id = lines[0]["id"]
+        resp_disp = client.post(
+            "/workflows/dispatch",
+            json={
+                "context_id": line_id,
+                "config": build_graph_still_config(),
+            },
+        )
+        if resp_disp.status_code not in (200, 201):
+            fail(f"Workflow draft dispatch failed: {resp_disp.status_code} {resp_disp.text[:300]}")
+            return False
+        dispatch_data = resp_disp.json()
+        dispatch_run_id = dispatch_data["workflow_run"]["id"]
+        ok(f"Graph workflow dispatched (run={dispatch_run_id[:8]}..., tasks={dispatch_data.get('dispatched', '?')})")
+    else:
+        resp_disp = client.post(f"/orders/{order_id}/dispatch-renders")
+        if resp_disp.status_code not in (200, 201, 204):
+            fail(f"Dispatch renders failed: {resp_disp.status_code} {resp_disp.text[:200]}")
+            return False
+        dispatch_data = resp_disp.json() if resp_disp.content else {}
+        dispatched = dispatch_data.get("dispatched", "?")
+        ok(f"Renders dispatched ({dispatched} lines)")

    # Poll for order completion
    info(f"Waiting for renders to complete (timeout={RENDER_TIMEOUT_SECONDS}s per OT)...")
@@ -323,10 +435,20 @@ def _submit_and_wait(client: APIClient, order_id: str, output_type_ids: list[str
            info(f"  {summary}")
            last_summary = summary

-        if order_status == "completed":
-            ok(f"Order completed — all {len(lines)} render(s) done")
-            # Check individual line results
-            all_success = True
+        terminal_states = {"completed", "failed", "cancelled"}
+        line_states = [state for state in statuses if state]
+        if line_states and all(state in terminal_states for state in line_states):
+            all_success = all(state == "completed" for state in line_states)
+            if order_status == "completed":
+                ok(f"Order completed — all {len(lines)} render(s) done")
+            elif all_success:
+                ok(
+                    f"All {len(lines)} render line(s) completed "
+                    f"(order status remains {order_status})"
+                )
+            else:
+                fail(f"Order reached terminal line states with order={order_status}")
+
            for line in lines:
                rs = line.get("render_status")
                ot_name = line.get("output_type_name") or line.get("output_type", {}).get("name", "?")
@@ -334,13 +456,30 @@ def _submit_and_wait(client: APIClient, order_id: str, output_type_ids: list[str
                    ok(f"  Line [{ot_name}]: completed")
                elif rs == "failed":
                    fail(f"  Line [{ot_name}]: FAILED")
-                    all_success = False
                else:
                    warn(f"  Line [{ot_name}]: {rs}")
+
+            if all_success and dispatch_run_id:
+                resp_cmp = client.get(f"/workflows/runs/{dispatch_run_id}/comparison")
+                if resp_cmp.status_code == 200:
+                    comparison = resp_cmp.json()
+                    rollout_gate = evaluate_rollout_gate_from_comparison(comparison)
+                    verdict = rollout_gate["verdict"]
+                    if verdict == "pass":
+                        ok("  Rollout gate PASS — graph output is ready for workflow-first rollout")
+                    elif verdict == "warn":
+                        warn("  Rollout gate WARN — keep legacy authoritative and review drift")
+                    else:
+                        warn("  Rollout gate FAIL — keep legacy authoritative")
+                    info(f"  Comparison status: {comparison.get('status')}, verdict={verdict}")
+                    for reason in rollout_gate["reasons"]:
+                        info(f"    {reason}")
+                else:
+                    warn(f"  Comparison lookup failed: {resp_cmp.status_code}")
            return all_success

        if order_status == "failed":
-            fail(f"Order FAILED — check render logs")
+            fail("Order FAILED — check render logs")
            return False

        time.sleep(POLL_INTERVAL_SECONDS)
@@ -377,6 +516,7 @@ def main():
    parser.add_argument("--health", action="store_true", help="Only run health check")
    parser.add_argument("--sample", action="store_true", help="Quick sample test (1 STEP, 1 OT)")
    parser.add_argument("--full", action="store_true", help="Full test (all output types)")
+    parser.add_argument("--graph", action="store_true", help="Dispatch sample/full renders via /api/workflows/dispatch")
    parser.add_argument("--step", default=str(SAMPLE_STEP), help="Path to STEP file")
    args = parser.parse_args()

@@ -431,7 +571,13 @@ def main():
            output_types[0],
        )
        info(f"Sample test using output type: {ot['name']}")
-        test_order_render(client, cad_file_id, [ot["id"]], f"Sample [{ot['name']}]")
+        test_order_render(
+            client,
+            cad_file_id,
+            [ot["id"]],
+            f"Sample [{ot['name']}]",
+            use_graph_dispatch=args.graph,
+        )

    elif args.full:
        # Test each output type individually
@@ -439,7 +585,13 @@ def main():
            if ot.get("is_animation"):
                warn(f"Skipping animation output type: {ot['name']} (too slow for full test)")
                continue
-            test_order_render(client, cad_file_id, [ot["id"]], ot["name"])
+            test_order_render(
+                client,
+                cad_file_id,
+                [ot["id"]],
+                ot["name"],
+                use_graph_dispatch=args.graph,
+            )

    _print_summary()
    sys.exit(0 if not failed else 1)