chore: snapshot workflow migration progress

This commit is contained in:
2026-04-12 11:49:04 +02:00
parent 0cd02513d5
commit 3e810c74a3
163 changed files with 31774 additions and 2753 deletions
+158 -10
View File
@@ -16,6 +16,8 @@ import logging
logger = logging.getLogger(__name__)
_WORKFLOW_ROLLOUT_MODES = {"legacy_only", "shadow", "graph"}
def _build_rollout_signal(
*,
@@ -39,6 +41,13 @@ def _build_rollout_signal(
}
def _normalize_workflow_rollout_mode(value: str | None) -> str:
normalized = (value or "legacy_only").strip().lower()
if normalized in _WORKFLOW_ROLLOUT_MODES:
return normalized
return "legacy_only"
def dispatch_render_with_workflow(order_line_id: str) -> dict:
"""Dispatch a render for the given order line.
@@ -54,12 +63,19 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
from app.config import settings
from app.domains.orders.models import OrderLine
from app.domains.rendering.models import OutputType, WorkflowDefinition
from app.domains.rendering.output_type_contracts import (
derive_supported_artifact_kinds_from_workflow_config,
)
from app.domains.rendering.workflow_config_utils import (
canonicalize_workflow_config,
extract_runtime_workflow,
get_workflow_execution_mode,
)
from app.domains.rendering.workflow_executor import prepare_workflow_context
from app.domains.rendering.workflow_executor import (
WorkflowTaskSubmissionError,
prepare_workflow_context,
submit_prepared_workflow_tasks,
)
from app.domains.rendering.workflow_graph_runtime import (
execute_graph_workflow,
find_unsupported_graph_nodes,
@@ -150,7 +166,41 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
)
return legacy_result
execution_mode = get_workflow_execution_mode(canonical_config, default="legacy")
supported_artifact_kinds = derive_supported_artifact_kinds_from_workflow_config(canonical_config)
output_type_artifact_kind = getattr(output_type, "artifact_kind", None)
if output_type_artifact_kind and output_type_artifact_kind not in supported_artifact_kinds:
supported = ", ".join(supported_artifact_kinds) if supported_artifact_kinds else "none"
logger.warning(
"order_line %s: workflow_definition_id %s is incompatible with output_type %s artifact_kind %s; "
"falling back to legacy dispatch",
order_line_id,
wf_def.id,
output_type.id,
output_type_artifact_kind,
)
legacy_result = _legacy_dispatch(order_line_id)
legacy_result.update(
_build_rollout_signal(
gate_status="workflow_contract_mismatch",
ready=False,
reasons=[
"Linked workflow does not produce the artifact kind required by the output type; legacy dispatch remains authoritative.",
f"Expected artifact kind: {output_type_artifact_kind}. Supported by workflow: [{supported}].",
],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result
configured_execution_mode = get_workflow_execution_mode(canonical_config, default="legacy")
workflow_rollout_mode = _normalize_workflow_rollout_mode(
getattr(output_type, "workflow_rollout_mode", None)
)
legacy_runtime_gate_status = "workflow_legacy_runtime"
legacy_runtime_reasons = [
"Workflow definition is active, but execution still uses the legacy runtime path."
]
def _prepare_graph_context(target_mode: str):
workflow_context = prepare_workflow_context(
@@ -175,7 +225,38 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
session.commit()
return run
if execution_mode == "graph":
if configured_execution_mode in {"graph", "shadow"} and workflow_rollout_mode == "legacy_only":
logger.info(
"order_line %s: workflow_definition_id %s is graph-capable but output_type %s is pinned to legacy_only rollout",
order_line_id,
wf_def.id,
output_type.id,
)
legacy_result = _legacy_dispatch(order_line_id)
legacy_result["workflow_rollout_mode"] = workflow_rollout_mode
legacy_result["configured_execution_mode"] = configured_execution_mode
legacy_result.update(
_build_rollout_signal(
gate_status="rollout_legacy_only",
ready=False,
reasons=[
"Output type rollout mode is pinned to legacy_only; legacy dispatch remains authoritative.",
f"Linked workflow stays attached in configured execution mode '{configured_execution_mode}' until rollout is promoted.",
],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result
if workflow_rollout_mode in {"graph", "shadow"} and configured_execution_mode not in {"graph", "shadow"}:
legacy_runtime_gate_status = "rollout_requires_graph_workflow"
legacy_runtime_reasons = [
f"Output type rollout mode '{workflow_rollout_mode}' requires a workflow configured for graph or shadow execution.",
f"Linked workflow is still configured for '{configured_execution_mode}', so legacy runtime remains authoritative.",
]
if configured_execution_mode in {"graph", "shadow"} and workflow_rollout_mode == "graph":
try:
workflow_context = _prepare_graph_context("graph")
except Exception as exc:
@@ -225,13 +306,44 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
return legacy_result
try:
dispatch_result = execute_graph_workflow(session, workflow_context)
dispatch_result = execute_graph_workflow(
session,
workflow_context,
dispatch_tasks=False,
)
session.commit()
submit_prepared_workflow_tasks(dispatch_result)
except Exception as exc:
session.rollback()
session.add(run)
mark_workflow_run_failed(run, str(exc))
session.commit()
if isinstance(exc, WorkflowTaskSubmissionError) and exc.submitted_task_ids:
logger.exception(
"order_line %s: graph workflow submission partially failed after %d task(s); "
"not falling back to legacy to avoid duplicate renders",
order_line_id,
len(exc.submitted_task_ids),
)
return {
"backend": "workflow_graph",
"execution_mode": "graph",
"workflow_run_id": str(run.id),
"workflow_rollout_mode": workflow_rollout_mode,
"configured_execution_mode": configured_execution_mode,
"submission_status": "partial_failure",
"submitted_task_ids": exc.submitted_task_ids,
**_build_rollout_signal(
gate_status="graph_submission_failed",
ready=False,
reasons=[
"Graph workflow task submission failed after some tasks were already queued.",
f"Submission error: {exc}.",
],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
),
}
logger.exception(
"order_line %s: graph workflow execution via definition %s failed, falling back to legacy dispatch",
order_line_id,
@@ -257,6 +369,8 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
"workflow_run_id": str(run.id),
"celery_task_id": dispatch_result.task_ids[0] if dispatch_result.task_ids else None,
"task_ids": dispatch_result.task_ids,
"workflow_rollout_mode": workflow_rollout_mode,
"configured_execution_mode": configured_execution_mode,
}
result.update(
_build_rollout_signal(
@@ -267,10 +381,10 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
)
return result
if execution_mode == "shadow":
if configured_execution_mode in {"graph", "shadow"} and workflow_rollout_mode == "shadow":
legacy_result = _legacy_dispatch(order_line_id)
try:
@@ -330,13 +444,43 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
return legacy_result
try:
dispatch_result = execute_graph_workflow(session, workflow_context)
dispatch_result = execute_graph_workflow(
session,
workflow_context,
dispatch_tasks=False,
)
session.commit()
submit_prepared_workflow_tasks(dispatch_result)
except Exception as exc:
session.rollback()
session.add(run)
mark_workflow_run_failed(run, str(exc))
session.commit()
if isinstance(exc, WorkflowTaskSubmissionError) and exc.submitted_task_ids:
logger.exception(
"order_line %s: shadow workflow submission partially failed after %d task(s); "
"legacy dispatch remains authoritative",
order_line_id,
len(exc.submitted_task_ids),
)
legacy_result["execution_mode"] = "shadow"
legacy_result["shadow_status"] = "partial_failure"
legacy_result["shadow_error"] = str(exc)
legacy_result["shadow_workflow_run_id"] = str(run.id)
legacy_result["shadow_submitted_task_ids"] = exc.submitted_task_ids
legacy_result.update(
_build_rollout_signal(
gate_status="shadow_submission_failed",
ready=False,
reasons=[
"Shadow workflow task submission failed after some tasks were already queued.",
f"Submission error: {exc}.",
],
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
return legacy_result
logger.exception(
"order_line %s: shadow workflow execution via definition %s failed; legacy dispatch remains authoritative",
order_line_id,
@@ -364,6 +508,8 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
legacy_result["shadow_status"] = "dispatched"
legacy_result["shadow_workflow_run_id"] = str(run.id)
legacy_result["shadow_task_ids"] = dispatch_result.task_ids
legacy_result["workflow_rollout_mode"] = workflow_rollout_mode
legacy_result["configured_execution_mode"] = configured_execution_mode
legacy_result.update(
_build_rollout_signal(
gate_status="pending_shadow_verdict",
@@ -375,7 +521,7 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)
)
)
return legacy_result
workflow_type, params = extract_runtime_workflow(canonical_config)
@@ -519,12 +665,14 @@ def dispatch_render_with_workflow(order_line_id: str) -> dict:
"execution_mode": "legacy",
"workflow_run_id": str(run.id),
"celery_task_id": celery_task_id,
"workflow_rollout_mode": workflow_rollout_mode,
"configured_execution_mode": configured_execution_mode,
}
result.update(
_build_rollout_signal(
gate_status="workflow_legacy_runtime",
gate_status=legacy_runtime_gate_status,
ready=False,
reasons=["Workflow definition is active, but execution still uses the legacy runtime path."],
reasons=legacy_runtime_reasons,
workflow_def_id=wf_def.id,
output_type_id=output_type.id,
)