feat: add workflow rollout gate signals

This commit is contained in:
2026-04-08 21:44:02 +02:00
parent 8c9648d5dc
commit fe46dabfc5
4 changed files with 1624 additions and 101 deletions
+218 -14
View File
@@ -17,6 +17,28 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _build_rollout_signal(
*,
gate_status: str,
ready: bool,
reasons: list[str],
workflow_def_id=None,
output_type_id=None,
verdict: str | None = None,
) -> dict:
return {
"rollout_gate_status": gate_status,
"rollout_gate_verdict": verdict,
"rollout_gate_reasons": reasons,
"workflow_rollout_ready": ready,
"workflow_rollout_status": "ready_for_rollout" if ready else "hold_legacy_authoritative",
"output_type_rollout_ready": ready,
"output_type_rollout_status": "ready_for_rollout" if ready else "hold_legacy_authoritative",
"rollout_workflow_definition_id": str(workflow_def_id) if workflow_def_id is not None else None,
"rollout_output_type_id": str(output_type_id) if output_type_id is not None else None,
}
def dispatch_render_with_workflow(order_line_id: str) -> dict: def dispatch_render_with_workflow(order_line_id: str) -> dict:
"""Dispatch a render for the given order line. """Dispatch a render for the given order line.
@@ -33,6 +55,7 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
from app.domains.orders.models import OrderLine from app.domains.orders.models import OrderLine
from app.domains.rendering.models import OutputType, WorkflowDefinition from app.domains.rendering.models import OutputType, WorkflowDefinition
from app.domains.rendering.workflow_config_utils import ( from app.domains.rendering.workflow_config_utils import (
canonicalize_workflow_config,
extract_runtime_workflow, extract_runtime_workflow,
get_workflow_execution_mode, get_workflow_execution_mode,
) )
@@ -67,7 +90,16 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
"order_line %s: no workflow_definition_id, using legacy dispatch", "order_line %s: no workflow_definition_id, using legacy dispatch",
order_line_id, order_line_id,
) )
return _legacy_dispatch(order_line_id) legacy_result = _legacy_dispatch(order_line_id)
legacy_result.update(
_build_rollout_signal(
gate_status="legacy_only",
ready=False,
reasons=["No workflow definition is linked; legacy dispatch remains authoritative."],
output_type_id=getattr(output_type, "id", None),
)
)
return legacy_result
# Load the linked WorkflowDefinition # Load the linked WorkflowDefinition
wf_def: WorkflowDefinition | None = session.execute( wf_def: WorkflowDefinition | None = session.execute(
@@ -84,13 +116,45 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
order_line_id, order_line_id,
output_type.workflow_definition_id, output_type.workflow_definition_id,
) )
return _legacy_dispatch(order_line_id) legacy_result = _legacy_dispatch(order_line_id)
legacy_result.update(
_build_rollout_signal(
gate_status="workflow_unavailable",
ready=False,
reasons=["Linked workflow definition is missing or inactive; legacy dispatch remains authoritative."],
workflow_def_id=output_type.workflow_definition_id,
output_type_id=output_type.id,
)
)
return legacy_result
execution_mode = get_workflow_execution_mode(wf_def.config, default="legacy") try:
canonical_config = canonicalize_workflow_config(wf_def.config)
except Exception as exc:
logger.warning(
"order_line %s: workflow_definition_id %s has invalid config (%s), "
"falling back to legacy dispatch",
order_line_id,
wf_def.id,
exc,
)
legacy_result = _legacy_dispatch(order_line_id)
legacy_result.update(
_build_rollout_signal(
gate_status="workflow_invalid",
ready=False,
reasons=[f"Workflow definition config is invalid: {exc}."],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result
execution_mode = get_workflow_execution_mode(canonical_config, default="legacy")
def _prepare_graph_context(target_mode: str): def _prepare_graph_context(target_mode: str):
workflow_context = prepare_workflow_context( workflow_context = prepare_workflow_context(
wf_def.config, canonical_config,
context_id=order_line_id, context_id=order_line_id,
execution_mode=target_mode, execution_mode=target_mode,
) )
@@ -122,7 +186,18 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
wf_def.id, wf_def.id,
exc, exc,
) )
return _legacy_dispatch(order_line_id) legacy_result = _legacy_dispatch(order_line_id)
legacy_result["fallback_from"] = "workflow_graph"
legacy_result.update(
_build_rollout_signal(
gate_status="graph_preparation_failed",
ready=False,
reasons=[f"Graph runtime preparation failed: {exc}."],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result
run = None run = None
try: try:
@@ -136,7 +211,18 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
wf_def.id, wf_def.id,
exc, exc,
) )
return _legacy_dispatch(order_line_id) legacy_result = _legacy_dispatch(order_line_id)
legacy_result["fallback_from"] = "workflow_graph"
legacy_result.update(
_build_rollout_signal(
gate_status="graph_run_creation_failed",
ready=False,
reasons=[f"Graph workflow run creation failed: {exc}."],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result
try: try:
dispatch_result = execute_graph_workflow(session, workflow_context) dispatch_result = execute_graph_workflow(session, workflow_context)
@@ -154,15 +240,35 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
fallback_result = _legacy_dispatch(order_line_id) fallback_result = _legacy_dispatch(order_line_id)
fallback_result["fallback_from"] = "workflow_graph" fallback_result["fallback_from"] = "workflow_graph"
fallback_result["workflow_run_id"] = str(run.id) fallback_result["workflow_run_id"] = str(run.id)
fallback_result.update(
_build_rollout_signal(
gate_status="graph_execution_failed",
ready=False,
reasons=[f"Graph workflow execution failed: {exc}."],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return fallback_result return fallback_result
return { result = {
"backend": "workflow_graph", "backend": "workflow_graph",
"execution_mode": "graph", "execution_mode": "graph",
"workflow_run_id": str(run.id), "workflow_run_id": str(run.id),
"celery_task_id": dispatch_result.task_ids[0] if dispatch_result.task_ids else None, "celery_task_id": dispatch_result.task_ids[0] if dispatch_result.task_ids else None,
"task_ids": dispatch_result.task_ids, "task_ids": dispatch_result.task_ids,
} }
result.update(
_build_rollout_signal(
gate_status="graph_authoritative",
ready=True,
verdict="pass",
reasons=["Workflow graph dispatch is authoritative for this output type."],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return result
if execution_mode == "shadow": if execution_mode == "shadow":
legacy_result = _legacy_dispatch(order_line_id) legacy_result = _legacy_dispatch(order_line_id)
@@ -180,6 +286,18 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
legacy_result["execution_mode"] = "shadow" legacy_result["execution_mode"] = "shadow"
legacy_result["shadow_status"] = "skipped" legacy_result["shadow_status"] = "skipped"
legacy_result["shadow_error"] = str(exc) legacy_result["shadow_error"] = str(exc)
legacy_result.update(
_build_rollout_signal(
gate_status="shadow_skipped",
ready=False,
reasons=[
"Shadow workflow preparation failed; legacy dispatch remains authoritative.",
f"Preparation error: {exc}.",
],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result return legacy_result
run = None run = None
@@ -197,6 +315,18 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
legacy_result["execution_mode"] = "shadow" legacy_result["execution_mode"] = "shadow"
legacy_result["shadow_status"] = "failed" legacy_result["shadow_status"] = "failed"
legacy_result["shadow_error"] = str(exc) legacy_result["shadow_error"] = str(exc)
legacy_result.update(
_build_rollout_signal(
gate_status="shadow_run_creation_failed",
ready=False,
reasons=[
"Shadow workflow run could not be created; legacy dispatch remains authoritative.",
f"Run creation error: {exc}.",
],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result return legacy_result
try: try:
@@ -216,15 +346,39 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
legacy_result["shadow_status"] = "failed" legacy_result["shadow_status"] = "failed"
legacy_result["shadow_error"] = str(exc) legacy_result["shadow_error"] = str(exc)
legacy_result["shadow_workflow_run_id"] = str(run.id) legacy_result["shadow_workflow_run_id"] = str(run.id)
legacy_result.update(
_build_rollout_signal(
gate_status="shadow_execution_failed",
ready=False,
reasons=[
"Shadow workflow execution failed; legacy dispatch remains authoritative.",
f"Execution error: {exc}.",
],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result return legacy_result
legacy_result["execution_mode"] = "shadow" legacy_result["execution_mode"] = "shadow"
legacy_result["shadow_status"] = "dispatched" legacy_result["shadow_status"] = "dispatched"
legacy_result["shadow_workflow_run_id"] = str(run.id) legacy_result["shadow_workflow_run_id"] = str(run.id)
legacy_result["shadow_task_ids"] = dispatch_result.task_ids legacy_result["shadow_task_ids"] = dispatch_result.task_ids
legacy_result.update(
_build_rollout_signal(
gate_status="pending_shadow_verdict",
ready=False,
reasons=[
"Legacy dispatch remains authoritative until the shadow workflow comparison returns pass.",
"A pass verdict is required before workflow-first rollout is ready.",
],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result return legacy_result
workflow_type, params = extract_runtime_workflow(wf_def.config) workflow_type, params = extract_runtime_workflow(canonical_config)
if workflow_type is None or workflow_type == "custom": if workflow_type is None or workflow_type == "custom":
logger.warning( logger.warning(
"order_line %s: workflow_definition_id %s has no supported preset runtime, " "order_line %s: workflow_definition_id %s has no supported preset runtime, "
@@ -232,7 +386,17 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
order_line_id, order_line_id,
wf_def.id, wf_def.id,
) )
return _legacy_dispatch(order_line_id) legacy_result = _legacy_dispatch(order_line_id)
legacy_result.update(
_build_rollout_signal(
gate_status="workflow_runtime_unsupported",
ready=False,
reasons=["Workflow definition has no supported preset runtime; legacy dispatch remains authoritative."],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result
logger.info( logger.info(
"order_line %s: dispatching via WorkflowDefinition %s (type=%s)", "order_line %s: dispatching via WorkflowDefinition %s (type=%s)",
@@ -243,7 +407,7 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
try: try:
workflow_context = prepare_workflow_context( workflow_context = prepare_workflow_context(
wf_def.config, canonical_config,
context_id=order_line_id, context_id=order_line_id,
execution_mode="legacy", execution_mode="legacy",
) )
@@ -255,7 +419,17 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
wf_def.id, wf_def.id,
exc, exc,
) )
return _legacy_dispatch(order_line_id) legacy_result = _legacy_dispatch(order_line_id)
legacy_result.update(
_build_rollout_signal(
gate_status="workflow_preparation_failed",
ready=False,
reasons=[f"Workflow runtime preparation failed: {exc}."],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result
# For turntable workflows: resolve step_path + output_dir from the order line at runtime # For turntable workflows: resolve step_path + output_dir from the order line at runtime
if workflow_type == "turntable" and ("step_path" not in params or "output_dir" not in params): if workflow_type == "turntable" and ("step_path" not in params or "output_dir" not in params):
@@ -299,7 +473,17 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
wf_def.id, wf_def.id,
exc, exc,
) )
return _legacy_dispatch(order_line_id) legacy_result = _legacy_dispatch(order_line_id)
legacy_result.update(
_build_rollout_signal(
gate_status="workflow_run_creation_failed",
ready=False,
reasons=[f"Workflow run creation failed: {exc}."],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result
from app.domains.rendering.workflow_builder import dispatch_workflow from app.domains.rendering.workflow_builder import dispatch_workflow
@@ -317,15 +501,35 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
order_line_id, order_line_id,
wf_def.id, wf_def.id,
) )
return _legacy_dispatch(order_line_id) legacy_result = _legacy_dispatch(order_line_id)
legacy_result.update(
_build_rollout_signal(
gate_status="workflow_dispatch_failed",
ready=False,
reasons=[f"Workflow dispatch failed: {exc}."],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result
return { result = {
"backend": "workflow", "backend": "workflow",
"workflow_type": workflow_type, "workflow_type": workflow_type,
"execution_mode": "legacy", "execution_mode": "legacy",
"workflow_run_id": str(run.id), "workflow_run_id": str(run.id),
"celery_task_id": celery_task_id, "celery_task_id": celery_task_id,
} }
result.update(
_build_rollout_signal(
gate_status="workflow_legacy_runtime",
ready=False,
reasons=["Workflow definition is active, but execution still uses the legacy runtime path."],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return result
def _legacy_dispatch(order_line_id: str) -> dict: def _legacy_dispatch(order_line_id: str) -> dict:
@@ -10,11 +10,16 @@ from PIL import Image, ImageChops, ImageStat
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from app.config import settings
from app.core.render_paths import resolve_result_path, result_path_to_storage_key
from app.domains.media.models import MediaAsset from app.domains.media.models import MediaAsset
from app.domains.orders.models import OrderLine from app.domains.orders.models import OrderLine
from app.domains.rendering.models import WorkflowRun from app.domains.rendering.models import WorkflowRun
from app.domains.rendering.schemas import WorkflowComparisonArtifactOut, WorkflowRunComparisonOut from app.domains.rendering.schemas import WorkflowComparisonArtifactOut, WorkflowRunComparisonOut
ROLLOUT_PASS_MAX_MEAN_PIXEL_DELTA = 0.0
ROLLOUT_WARN_MAX_MEAN_PIXEL_DELTA = 0.02
@dataclass(slots=True) @dataclass(slots=True)
class _ArtifactComparison: class _ArtifactComparison:
@@ -36,18 +41,78 @@ class _ArtifactComparison:
sha256=self.sha256, sha256=self.sha256,
mime_type=self.mime_type, mime_type=self.mime_type,
image_width=self.image_width, image_width=self.image_width,
image_height=self.image_height, image_height=self.image_height,
) )
def evaluate_rollout_gate(
*,
authoritative_output: _ArtifactComparison,
observer_output: _ArtifactComparison,
exact_match: bool | None,
dimensions_match: bool | None,
mean_pixel_delta: float | None,
) -> dict[str, object]:
thresholds = {
"pass_max_mean_pixel_delta": ROLLOUT_PASS_MAX_MEAN_PIXEL_DELTA,
"warn_max_mean_pixel_delta": ROLLOUT_WARN_MAX_MEAN_PIXEL_DELTA,
}
reasons: list[str] = []
if not authoritative_output.exists:
verdict = "fail"
reasons.append("Authoritative legacy output is missing; keep legacy fallback active.")
elif not observer_output.exists:
verdict = "fail"
reasons.append("Observer workflow output is missing; rollout cannot be approved.")
elif exact_match:
verdict = "pass"
reasons.append("Observer output matches the authoritative legacy output byte-for-byte.")
elif dimensions_match is False:
verdict = "fail"
reasons.append("Observer output dimensions differ from the authoritative legacy output.")
elif mean_pixel_delta is None:
verdict = "fail"
reasons.append("Observer output could not be pixel-compared against the authoritative output.")
elif mean_pixel_delta <= ROLLOUT_PASS_MAX_MEAN_PIXEL_DELTA:
verdict = "pass"
reasons.append("Observer output is visually identical within the pass threshold.")
elif mean_pixel_delta <= ROLLOUT_WARN_MAX_MEAN_PIXEL_DELTA:
verdict = "warn"
reasons.append(
"Observer output differs slightly from the authoritative output but remains within the warn threshold."
)
else:
verdict = "fail"
reasons.append(
"Observer output exceeds the allowed parity threshold; keep legacy fallback active."
)
if mean_pixel_delta is not None and not exact_match:
reasons.append(
f"Mean pixel delta {mean_pixel_delta:.6f}; "
f"pass<={ROLLOUT_PASS_MAX_MEAN_PIXEL_DELTA:.6f}, "
f"warn<={ROLLOUT_WARN_MAX_MEAN_PIXEL_DELTA:.6f}."
)
rollout_ready = verdict == "pass"
rollout_status = "ready_for_rollout" if rollout_ready else "hold_legacy_authoritative"
return {
"verdict": verdict,
"ready": rollout_ready,
"status": rollout_status,
"reasons": reasons,
"thresholds": thresholds,
"workflow_rollout_ready": rollout_ready,
"workflow_rollout_status": rollout_status,
"output_type_rollout_ready": rollout_ready,
"output_type_rollout_status": rollout_status,
}
def _normalize_storage_key(path: str | None) -> str | None: def _normalize_storage_key(path: str | None) -> str | None:
if not path: return result_path_to_storage_key(path)
return None
normalized = path.replace("\\", "/")
marker = "/uploads/"
if marker in normalized:
return normalized.split(marker, 1)[1]
return normalized.lstrip("/")
def _build_artifact(path: str | None) -> _ArtifactComparison: def _build_artifact(path: str | None) -> _ArtifactComparison:
@@ -63,7 +128,8 @@ def _build_artifact(path: str | None) -> _ArtifactComparison:
image_height=None, image_height=None,
) )
file_path = Path(path) resolved_path = resolve_result_path(path)
file_path = resolved_path or Path(path)
exists = file_path.exists() exists = file_path.exists()
mime_type, _ = mimetypes.guess_type(str(file_path)) mime_type, _ = mimetypes.guess_type(str(file_path))
@@ -136,10 +202,8 @@ async def _load_shadow_asset_by_workflow_run(
if asset is None: if asset is None:
return None return None
storage_key = asset.storage_key.lstrip("/") resolved = resolve_result_path(asset.storage_key)
if storage_key.startswith("app/uploads/"): return str(resolved) if resolved is not None else None
return f"/{storage_key}"
return f"/app/uploads/{storage_key}"
def _find_shadow_file(order_line: OrderLine, workflow_run: WorkflowRun) -> str | None: def _find_shadow_file(order_line: OrderLine, workflow_run: WorkflowRun) -> str | None:
@@ -147,9 +211,13 @@ def _find_shadow_file(order_line: OrderLine, workflow_run: WorkflowRun) -> str |
candidate_roots: list[Path] = [] candidate_roots: list[Path] = []
if order_line.result_path: if order_line.result_path:
candidate_roots.append(Path(order_line.result_path).parent) resolved_result = resolve_result_path(order_line.result_path)
if resolved_result is not None:
candidate_roots.append(resolved_result.parent)
candidate_roots.append(Path("/app/uploads/renders") / str(order_line.id)) upload_root = Path(settings.upload_dir)
candidate_roots.append(upload_root / "renders" / str(order_line.id))
candidate_roots.append(upload_root / "step_files" / "renders")
seen_roots: set[Path] = set() seen_roots: set[Path] = set()
candidates: list[Path] = [] candidates: list[Path] = []
@@ -215,6 +283,9 @@ async def build_workflow_run_comparison(
if exact_match: if exact_match:
status = "matched" status = "matched"
summary = "Observer output matches the authoritative legacy output byte-for-byte." summary = "Observer output matches the authoritative legacy output byte-for-byte."
elif mean_pixel_delta == 0.0 and dimensions_match:
status = "matched"
summary = "Observer output matches the authoritative legacy output visually, but file metadata differs."
else: else:
status = "different" status = "different"
if dimensions_match is False: if dimensions_match is False:
File diff suppressed because it is too large Load Diff
+211 -59
View File
@@ -53,6 +53,9 @@ passed = []
failed = [] failed = []
warnings = [] warnings = []
ROLLOUT_PASS_MAX_MEAN_PIXEL_DELTA = 0.0
ROLLOUT_WARN_MAX_MEAN_PIXEL_DELTA = 0.02
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Helpers # Helpers
@@ -83,6 +86,55 @@ def section(title: str):
print(f"{BLUE}{'='*60}{RESET}") print(f"{BLUE}{'='*60}{RESET}")
def evaluate_rollout_gate_from_comparison(comparison: dict) -> dict:
reasons: list[str] = []
mean_pixel_delta = comparison.get("mean_pixel_delta")
exact_match = comparison.get("exact_match")
dimensions_match = comparison.get("dimensions_match")
status = comparison.get("status")
authoritative_exists = bool(comparison.get("authoritative_output", {}).get("exists"))
observer_exists = bool(comparison.get("observer_output", {}).get("exists"))
if not authoritative_exists:
verdict = "fail"
reasons.append("Authoritative legacy output is missing.")
elif not observer_exists:
verdict = "fail"
reasons.append("Observer workflow output is missing.")
elif exact_match:
verdict = "pass"
reasons.append("Observer output matches the authoritative legacy output byte-for-byte.")
elif dimensions_match is False:
verdict = "fail"
reasons.append("Observer output dimensions differ from the authoritative legacy output.")
elif mean_pixel_delta is None:
verdict = "fail"
reasons.append(f"Workflow comparison did not produce a pixel delta (status={status}).")
elif mean_pixel_delta <= ROLLOUT_PASS_MAX_MEAN_PIXEL_DELTA:
verdict = "pass"
reasons.append("Observer output is visually identical within the pass threshold.")
elif mean_pixel_delta <= ROLLOUT_WARN_MAX_MEAN_PIXEL_DELTA:
verdict = "warn"
reasons.append("Observer output differs slightly but remains within the warn threshold.")
else:
verdict = "fail"
reasons.append("Observer output exceeds the rollout parity threshold.")
if mean_pixel_delta is not None and not exact_match:
reasons.append(
f"Mean pixel delta {mean_pixel_delta:.6f}; "
f"pass<={ROLLOUT_PASS_MAX_MEAN_PIXEL_DELTA:.6f}, "
f"warn<={ROLLOUT_WARN_MAX_MEAN_PIXEL_DELTA:.6f}."
)
return {
"verdict": verdict,
"ready": verdict == "pass",
"reasons": reasons,
}
class APIClient: class APIClient:
def __init__(self, host: str, email: str, password: str): def __init__(self, host: str, email: str, password: str):
self.host = host.rstrip("/") self.host = host.rstrip("/")
@@ -93,7 +145,7 @@ class APIClient:
def _login(self, email: str, password: str): def _login(self, email: str, password: str):
resp = self.session.post( resp = self.session.post(
f"{self.host}/api/auth/login", f"{self.host}/api/auth/login",
data={"username": email, "password": password}, json={"email": email, "password": password},
) )
resp.raise_for_status() resp.raise_for_status()
data = resp.json() data = resp.json()
@@ -110,6 +162,44 @@ class APIClient:
return self.session.delete(f"{self.host}/api{path}", **kwargs) return self.session.delete(f"{self.host}/api{path}", **kwargs)
def build_graph_still_config() -> dict:
return {
"version": 1,
"ui": {"preset": "still_graph", "execution_mode": "graph"},
"nodes": [
{
"id": "setup",
"step": "order_line_setup",
"params": {},
"ui": {"label": "Order Line Setup", "position": {"x": 0, "y": 100}},
},
{
"id": "template",
"step": "resolve_template",
"params": {},
"ui": {"label": "Resolve Template", "position": {"x": 220, "y": 100}},
},
{
"id": "render",
"step": "blender_still",
"params": {},
"ui": {"type": "renderNode", "label": "Still Render", "position": {"x": 440, "y": 100}},
},
{
"id": "output",
"step": "output_save",
"params": {},
"ui": {"type": "outputNode", "label": "Save Output", "position": {"x": 660, "y": 100}},
},
],
"edges": [
{"from": "setup", "to": "template"},
{"from": "template", "to": "render"},
{"from": "render", "to": "output"},
],
}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Test: Render health endpoint # Test: Render health endpoint
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -174,24 +264,34 @@ def test_step_upload(client: APIClient, step_file: Path) -> str | None:
cad_file_id = data["cad_file_id"] cad_file_id = data["cad_file_id"]
ok(f"STEP uploaded → cad_file_id={cad_file_id[:8]}... status={data.get('status')}") ok(f"STEP uploaded → cad_file_id={cad_file_id[:8]}... status={data.get('status')}")
# Poll for completed processing # Poll the existing CAD endpoints. There is no GET /api/cad/{id}; the most
# reliable readiness signal is /objects returning 200 with processing_status.
info(f"Waiting for CAD processing (timeout={CAD_PROCESSING_TIMEOUT}s)...") info(f"Waiting for CAD processing (timeout={CAD_PROCESSING_TIMEOUT}s)...")
deadline = time.time() + CAD_PROCESSING_TIMEOUT deadline = time.time() + CAD_PROCESSING_TIMEOUT
last_status = None last_status = None
while time.time() < deadline: while time.time() < deadline:
resp = client.get(f"/cad/{cad_file_id}") resp_objects = client.get(f"/cad/{cad_file_id}/objects")
if resp.status_code == 200: if resp_objects.status_code == 200:
cad = resp.json() cad = resp_objects.json()
status = cad.get("processing_status") status = cad.get("processing_status")
if status != last_status: if status != last_status:
info(f" CAD status: {status}") info(f" CAD status: {status}")
last_status = status last_status = status
if status == "completed": if status == "completed":
ok(f"CAD processing completed (thumbnail rendered)") ok("CAD processing completed (parsed objects available)")
return cad_file_id return cad_file_id
if status == "failed": if status == "failed":
fail(f"CAD processing FAILED: {cad.get('error_message', 'unknown error')}") fail(f"CAD processing FAILED: {cad.get('error_message', 'unknown error')}")
return None return None
resp_thumb = client.get(f"/cad/{cad_file_id}/thumbnail")
if resp_thumb.status_code == 200:
if last_status != "completed":
info(" CAD status: completed")
last_status = "completed"
ok("CAD processing completed (thumbnail available)")
return cad_file_id
time.sleep(POLL_INTERVAL_SECONDS) time.sleep(POLL_INTERVAL_SECONDS)
fail(f"CAD processing timed out after {CAD_PROCESSING_TIMEOUT}s (last status: {last_status})") fail(f"CAD processing timed out after {CAD_PROCESSING_TIMEOUT}s (last status: {last_status})")
@@ -207,17 +307,14 @@ def test_order_render(
cad_file_id: str, cad_file_id: str,
output_type_ids: list[str], output_type_ids: list[str],
test_label: str, test_label: str,
*,
use_graph_dispatch: bool = False,
) -> bool: ) -> bool:
"""Create a minimal order, submit, dispatch renders, wait for completion.""" """Create a minimal order, submit, dispatch renders, wait for completion."""
section(f"3. Order Render — {test_label}") section(f"3. Order Render — {test_label}")
info(f"Output types: {len(output_type_ids)}") info(f"Output types: {len(output_type_ids)}")
# Get a product that uses this CAD file # Get a product that uses this CAD file
resp = client.get(f"/cad/{cad_file_id}")
if resp.status_code != 200:
fail(f"CAD file lookup failed: {resp.status_code}")
return False
# Find or create a product linked to this CAD file # Find or create a product linked to this CAD file
product_id = None product_id = None
resp_products = client.get("/products/?limit=100") resp_products = client.get("/products/?limit=100")
@@ -245,46 +342,41 @@ def test_order_render(
product_id = resp_create.json()["id"] product_id = resp_create.json()["id"]
ok(f"Created test product: {product_id[:8]}...") ok(f"Created test product: {product_id[:8]}...")
# Build output_type_selections for one product resp_order = client.post(
ot_selections = [{"product_id": product_id, "output_type_id": ot_id} for ot_id in output_type_ids] "/orders",
json={
# Create order via wizard endpoint "notes": f"Render pipeline integration test: {test_label}",
resp_order = client.post("/orders/product-order", json={ "items": [],
"product_id": product_id, "lines": [
"output_type_selections": [ {"product_id": product_id, "output_type_id": ot_id}
{"output_type_id": ot_id} for ot_id in output_type_ids
for ot_id in output_type_ids ],
], },
}) )
if resp_order.status_code not in (200, 201): if resp_order.status_code not in (200, 201):
# Fallback: try to find existing submitted order fail(f"Order creation failed: {resp_order.status_code} {resp_order.text[:300]}")
warn(f"Product order wizard not available ({resp_order.status_code}), looking for existing order lines...") return False
return _test_existing_renders(client, product_id, output_type_ids)
order = resp_order.json() order = resp_order.json()
order_id = order["id"] order_id = order["id"]
ok(f"Order created: {order.get('order_number')} (id={order_id[:8]}...)") ok(f"Order created: {order.get('order_number')} (id={order_id[:8]}...)")
return _submit_and_wait(client, order_id, output_type_ids) return _submit_and_wait(
client,
order,
output_type_ids,
use_graph_dispatch=use_graph_dispatch,
)
def _test_existing_renders(client: APIClient, product_id: str, output_type_ids: list[str]) -> bool: def _submit_and_wait(
"""Find existing order lines for a product and wait for completion.""" client: APIClient,
resp = client.get(f"/orders/?limit=20") order: dict,
if resp.status_code != 200: output_type_ids: list[str],
fail("Could not list orders") *,
return False use_graph_dispatch: bool = False,
orders = resp.json() ) -> bool:
if isinstance(orders, dict): order_id = order["id"]
orders = orders.get("items", [])
for order in orders:
if order.get("status") in ("submitted", "processing", "rendering"):
return _submit_and_wait(client, order["id"], output_type_ids)
warn("No suitable existing orders found for render test")
return True # non-blocking warning
def _submit_and_wait(client: APIClient, order_id: str, output_type_ids: list[str]) -> bool:
# Submit # Submit
resp_sub = client.post(f"/orders/{order_id}/submit") resp_sub = client.post(f"/orders/{order_id}/submit")
if resp_sub.status_code not in (200, 201, 204): if resp_sub.status_code not in (200, 201, 204):
@@ -296,14 +388,34 @@ def _submit_and_wait(client: APIClient, order_id: str, output_type_ids: list[str
else: else:
ok("Order submitted") ok("Order submitted")
# Dispatch renders dispatch_run_id = None
resp_disp = client.post(f"/orders/{order_id}/dispatch-renders") if use_graph_dispatch:
if resp_disp.status_code not in (200, 201, 204): lines = order.get("lines", [])
fail(f"Dispatch renders failed: {resp_disp.status_code} {resp_disp.text[:200]}") if len(lines) != 1:
return False fail("Graph mode currently expects exactly one order line per test order")
dispatch_data = resp_disp.json() if resp_disp.content else {} return False
dispatched = dispatch_data.get("dispatched", "?") line_id = lines[0]["id"]
ok(f"Renders dispatched ({dispatched} lines)") resp_disp = client.post(
"/workflows/dispatch",
json={
"context_id": line_id,
"config": build_graph_still_config(),
},
)
if resp_disp.status_code not in (200, 201):
fail(f"Workflow draft dispatch failed: {resp_disp.status_code} {resp_disp.text[:300]}")
return False
dispatch_data = resp_disp.json()
dispatch_run_id = dispatch_data["workflow_run"]["id"]
ok(f"Graph workflow dispatched (run={dispatch_run_id[:8]}..., tasks={dispatch_data.get('dispatched', '?')})")
else:
resp_disp = client.post(f"/orders/{order_id}/dispatch-renders")
if resp_disp.status_code not in (200, 201, 204):
fail(f"Dispatch renders failed: {resp_disp.status_code} {resp_disp.text[:200]}")
return False
dispatch_data = resp_disp.json() if resp_disp.content else {}
dispatched = dispatch_data.get("dispatched", "?")
ok(f"Renders dispatched ({dispatched} lines)")
# Poll for order completion # Poll for order completion
info(f"Waiting for renders to complete (timeout={RENDER_TIMEOUT_SECONDS}s per OT)...") info(f"Waiting for renders to complete (timeout={RENDER_TIMEOUT_SECONDS}s per OT)...")
@@ -323,10 +435,20 @@ def _submit_and_wait(client: APIClient, order_id: str, output_type_ids: list[str
info(f" {summary}") info(f" {summary}")
last_summary = summary last_summary = summary
if order_status == "completed": terminal_states = {"completed", "failed", "cancelled"}
ok(f"Order completed — all {len(lines)} render(s) done") line_states = [state for state in statuses if state]
# Check individual line results if line_states and all(state in terminal_states for state in line_states):
all_success = True all_success = all(state == "completed" for state in line_states)
if order_status == "completed":
ok(f"Order completed — all {len(lines)} render(s) done")
elif all_success:
ok(
f"All {len(lines)} render line(s) completed "
f"(order status remains {order_status})"
)
else:
fail(f"Order reached terminal line states with order={order_status}")
for line in lines: for line in lines:
rs = line.get("render_status") rs = line.get("render_status")
ot_name = line.get("output_type_name") or line.get("output_type", {}).get("name", "?") ot_name = line.get("output_type_name") or line.get("output_type", {}).get("name", "?")
@@ -334,13 +456,30 @@ def _submit_and_wait(client: APIClient, order_id: str, output_type_ids: list[str
ok(f" Line [{ot_name}]: completed") ok(f" Line [{ot_name}]: completed")
elif rs == "failed": elif rs == "failed":
fail(f" Line [{ot_name}]: FAILED") fail(f" Line [{ot_name}]: FAILED")
all_success = False
else: else:
warn(f" Line [{ot_name}]: {rs}") warn(f" Line [{ot_name}]: {rs}")
if all_success and dispatch_run_id:
resp_cmp = client.get(f"/workflows/runs/{dispatch_run_id}/comparison")
if resp_cmp.status_code == 200:
comparison = resp_cmp.json()
rollout_gate = evaluate_rollout_gate_from_comparison(comparison)
verdict = rollout_gate["verdict"]
if verdict == "pass":
ok(" Rollout gate PASS — graph output is ready for workflow-first rollout")
elif verdict == "warn":
warn(" Rollout gate WARN — keep legacy authoritative and review drift")
else:
warn(" Rollout gate FAIL — keep legacy authoritative")
info(f" Comparison status: {comparison.get('status')}, verdict={verdict}")
for reason in rollout_gate["reasons"]:
info(f" {reason}")
else:
warn(f" Comparison lookup failed: {resp_cmp.status_code}")
return all_success return all_success
if order_status == "failed": if order_status == "failed":
fail(f"Order FAILED — check render logs") fail("Order FAILED — check render logs")
return False return False
time.sleep(POLL_INTERVAL_SECONDS) time.sleep(POLL_INTERVAL_SECONDS)
@@ -377,6 +516,7 @@ def main():
parser.add_argument("--health", action="store_true", help="Only run health check") parser.add_argument("--health", action="store_true", help="Only run health check")
parser.add_argument("--sample", action="store_true", help="Quick sample test (1 STEP, 1 OT)") parser.add_argument("--sample", action="store_true", help="Quick sample test (1 STEP, 1 OT)")
parser.add_argument("--full", action="store_true", help="Full test (all output types)") parser.add_argument("--full", action="store_true", help="Full test (all output types)")
parser.add_argument("--graph", action="store_true", help="Dispatch sample/full renders via /api/workflows/dispatch")
parser.add_argument("--step", default=str(SAMPLE_STEP), help="Path to STEP file") parser.add_argument("--step", default=str(SAMPLE_STEP), help="Path to STEP file")
args = parser.parse_args() args = parser.parse_args()
@@ -431,7 +571,13 @@ def main():
output_types[0], output_types[0],
) )
info(f"Sample test using output type: {ot['name']}") info(f"Sample test using output type: {ot['name']}")
test_order_render(client, cad_file_id, [ot["id"]], f"Sample [{ot['name']}]") test_order_render(
client,
cad_file_id,
[ot["id"]],
f"Sample [{ot['name']}]",
use_graph_dispatch=args.graph,
)
elif args.full: elif args.full:
# Test each output type individually # Test each output type individually
@@ -439,7 +585,13 @@ def main():
if ot.get("is_animation"): if ot.get("is_animation"):
warn(f"Skipping animation output type: {ot['name']} (too slow for full test)") warn(f"Skipping animation output type: {ot['name']} (too slow for full test)")
continue continue
test_order_render(client, cad_file_id, [ot["id"]], ot["name"]) test_order_render(
client,
cad_file_id,
[ot["id"]],
ot["name"],
use_graph_dispatch=args.graph,
)
_print_summary() _print_summary()
sys.exit(0 if not failed else 1) sys.exit(0 if not failed else 1)