HartOMat/backend/app/domains/rendering/dispatch_service.py

"""Workflow-aware render dispatch service.

C2: extends the legacy dispatch_render path with WorkflowDefinition support.

If an OutputType has workflow_definition_id set:
  - Loads the WorkflowDefinition
  - Calls dispatch_workflow() to build + submit a Celery Canvas
  - Creates a WorkflowRun record tracking the submission

If no workflow_definition_id is set, falls back to the existing direct
task-dispatch logic in app.services.render_dispatcher (legacy path).
"""
from __future__ import annotations

import logging

logger = logging.getLogger(__name__)

_WORKFLOW_ROLLOUT_MODES = {"legacy_only", "shadow", "graph"}


def _build_rollout_signal(
    *,
    gate_status: str,
    ready: bool,
    reasons: list[str],
    workflow_def_id=None,
    output_type_id=None,
    verdict: str | None = None,
) -> dict:
    return {
        "rollout_gate_status": gate_status,
        "rollout_gate_verdict": verdict,
        "rollout_gate_reasons": reasons,
        "workflow_rollout_ready": ready,
        "workflow_rollout_status": "ready_for_rollout" if ready else "hold_legacy_authoritative",
        "output_type_rollout_ready": ready,
        "output_type_rollout_status": "ready_for_rollout" if ready else "hold_legacy_authoritative",
        "rollout_workflow_definition_id": str(workflow_def_id) if workflow_def_id is not None else None,
        "rollout_output_type_id": str(output_type_id) if output_type_id is not None else None,
    }


def _normalize_workflow_rollout_mode(value: str | None) -> str:
    normalized = (value or "legacy_only").strip().lower()
    if normalized in _WORKFLOW_ROLLOUT_MODES:
        return normalized
    return "legacy_only"


def dispatch_render_with_workflow(order_line_id: str) -> dict:
    """Dispatch a render for the given order line.

    Checks whether the associated OutputType has a WorkflowDefinition linked.
    If yes, uses the Celery Canvas workflow builder.
    If no, falls back to the legacy direct-dispatch logic.

    This function is synchronous (Celery-task-safe).
    """
    from sqlalchemy import create_engine, select
    from sqlalchemy.orm import Session, selectinload

    from app.config import settings
    from app.domains.orders.models import OrderLine
    from app.domains.rendering.models import OutputType, WorkflowDefinition
    from app.domains.rendering.output_type_contracts import (
        derive_supported_artifact_kinds_from_workflow_config,
    )
    from app.domains.rendering.workflow_config_utils import (
        canonicalize_workflow_config,
        extract_runtime_workflow,
        get_workflow_execution_mode,
    )
    from app.domains.rendering.workflow_executor import (
        WorkflowTaskSubmissionError,
        prepare_workflow_context,
        submit_prepared_workflow_tasks,
    )
    from app.domains.rendering.workflow_graph_runtime import (
        execute_graph_workflow,
        find_unsupported_graph_nodes,
    )
    from app.domains.rendering.workflow_run_service import create_workflow_run, mark_workflow_run_failed

    engine = create_engine(
        settings.database_url.replace("+asyncpg", ""),
        pool_pre_ping=True,
    )

    with Session(engine) as session:
        # Load order line with its output_type
        line = session.execute(
            select(OrderLine)
            .where(OrderLine.id == order_line_id)
            .options(selectinload(OrderLine.output_type))
        ).scalar_one_or_none()

        if not line:
            raise ValueError(f"OrderLine {order_line_id} not found")

        output_type: OutputType | None = line.output_type

        if output_type is None or output_type.workflow_definition_id is None:
            # Legacy path — no workflow definition linked
            logger.info(
                "order_line %s: no workflow_definition_id, using legacy dispatch",
                order_line_id,
            )
            legacy_result = _legacy_dispatch(order_line_id)
            legacy_result.update(
                _build_rollout_signal(
                    gate_status="legacy_only",
                    ready=False,
                    reasons=["No workflow definition is linked; legacy dispatch remains authoritative."],
                    output_type_id=getattr(output_type, "id", None),
                )
            )
            return legacy_result

        # Load the linked WorkflowDefinition
        wf_def: WorkflowDefinition | None = session.execute(
            select(WorkflowDefinition).where(
                WorkflowDefinition.id == output_type.workflow_definition_id,
                WorkflowDefinition.is_active.is_(True),
            )
        ).scalar_one_or_none()

        if wf_def is None:
            logger.warning(
                "order_line %s: workflow_definition_id %s not found or inactive, "
                "falling back to legacy dispatch",
                order_line_id,
                output_type.workflow_definition_id,
            )
            legacy_result = _legacy_dispatch(order_line_id)
            legacy_result.update(
                _build_rollout_signal(
                    gate_status="workflow_unavailable",
                    ready=False,
                    reasons=["Linked workflow definition is missing or inactive; legacy dispatch remains authoritative."],
                    workflow_def_id=output_type.workflow_definition_id,
                    output_type_id=output_type.id,
                )
            )
            return legacy_result

        try:
            canonical_config = canonicalize_workflow_config(wf_def.config)
        except Exception as exc:
            logger.warning(
                "order_line %s: workflow_definition_id %s has invalid config (%s), "
                "falling back to legacy dispatch",
                order_line_id,
                wf_def.id,
                exc,
            )
            legacy_result = _legacy_dispatch(order_line_id)
            legacy_result.update(
                _build_rollout_signal(
                    gate_status="workflow_invalid",
                    ready=False,
                    reasons=[f"Workflow definition config is invalid: {exc}."],
                    workflow_def_id=wf_def.id,
                    output_type_id=output_type.id,
                )
            )
            return legacy_result

        supported_artifact_kinds = derive_supported_artifact_kinds_from_workflow_config(canonical_config)
        output_type_artifact_kind = getattr(output_type, "artifact_kind", None)
        if output_type_artifact_kind and output_type_artifact_kind not in supported_artifact_kinds:
            supported = ", ".join(supported_artifact_kinds) if supported_artifact_kinds else "none"
            logger.warning(
                "order_line %s: workflow_definition_id %s is incompatible with output_type %s artifact_kind %s; "
                "falling back to legacy dispatch",
                order_line_id,
                wf_def.id,
                output_type.id,
                output_type_artifact_kind,
            )
            legacy_result = _legacy_dispatch(order_line_id)
            legacy_result.update(
                _build_rollout_signal(
                    gate_status="workflow_contract_mismatch",
                    ready=False,
                    reasons=[
                        "Linked workflow does not produce the artifact kind required by the output type; legacy dispatch remains authoritative.",
                        f"Expected artifact kind: {output_type_artifact_kind}. Supported by workflow: [{supported}].",
                    ],
                    workflow_def_id=wf_def.id,
                    output_type_id=output_type.id,
                )
            )
            return legacy_result

        configured_execution_mode = get_workflow_execution_mode(canonical_config, default="legacy")
        workflow_rollout_mode = _normalize_workflow_rollout_mode(
            getattr(output_type, "workflow_rollout_mode", None)
        )
        legacy_runtime_gate_status = "workflow_legacy_runtime"
        legacy_runtime_reasons = [
            "Workflow definition is active, but execution still uses the legacy runtime path."
        ]

        def _prepare_graph_context(target_mode: str):
            workflow_context = prepare_workflow_context(
                canonical_config,
                context_id=order_line_id,
                execution_mode=target_mode,
            )
            unsupported_nodes = find_unsupported_graph_nodes(workflow_context)
            if unsupported_nodes:
                raise ValueError(
                    f"graph-unsupported nodes present: {', '.join(unsupported_nodes)}"
                )
            return workflow_context

        def _create_graph_run(workflow_context):
            run = create_workflow_run(
                session,
                workflow_def_id=wf_def.id,
                order_line_id=line.id,
                workflow_context=workflow_context,
            )
            session.commit()
            return run

        if configured_execution_mode in {"graph", "shadow"} and workflow_rollout_mode == "legacy_only":
            logger.info(
                "order_line %s: workflow_definition_id %s is graph-capable but output_type %s is pinned to legacy_only rollout",
                order_line_id,
                wf_def.id,
                output_type.id,
            )
            legacy_result = _legacy_dispatch(order_line_id)
            legacy_result["workflow_rollout_mode"] = workflow_rollout_mode
            legacy_result["configured_execution_mode"] = configured_execution_mode
            legacy_result.update(
                _build_rollout_signal(
                    gate_status="rollout_legacy_only",
                    ready=False,
                    reasons=[
                        "Output type rollout mode is pinned to legacy_only; legacy dispatch remains authoritative.",
                        f"Linked workflow stays attached in configured execution mode '{configured_execution_mode}' until rollout is promoted.",
                    ],
                    workflow_def_id=wf_def.id,
                    output_type_id=output_type.id,
                )
            )
            return legacy_result

        if workflow_rollout_mode in {"graph", "shadow"} and configured_execution_mode not in {"graph", "shadow"}:
            legacy_runtime_gate_status = "rollout_requires_graph_workflow"
            legacy_runtime_reasons = [
                f"Output type rollout mode '{workflow_rollout_mode}' requires a workflow configured for graph or shadow execution.",
                f"Linked workflow is still configured for '{configured_execution_mode}', so legacy runtime remains authoritative.",
            ]

        if configured_execution_mode in {"graph", "shadow"} and workflow_rollout_mode == "graph":
            try:
                workflow_context = _prepare_graph_context("graph")
            except Exception as exc:
                logger.warning(
                    "order_line %s: workflow_definition_id %s failed graph runtime preparation (%s), "
                    "falling back to legacy dispatch",
                    order_line_id,
                    wf_def.id,
                    exc,
                )
                legacy_result = _legacy_dispatch(order_line_id)
                legacy_result["fallback_from"] = "workflow_graph"
                legacy_result.update(
                    _build_rollout_signal(
                        gate_status="graph_preparation_failed",
                        ready=False,
                        reasons=[f"Graph runtime preparation failed: {exc}."],
                        workflow_def_id=wf_def.id,
                        output_type_id=output_type.id,
                    )
                )
                return legacy_result

            run = None
            try:
                run = _create_graph_run(workflow_context)
            except Exception as exc:
                session.rollback()
                logger.warning(
                    "order_line %s: failed to create graph workflow run for workflow_definition_id %s (%s), "
                    "falling back to legacy dispatch",
                    order_line_id,
                    wf_def.id,
                    exc,
                )
                legacy_result = _legacy_dispatch(order_line_id)
                legacy_result["fallback_from"] = "workflow_graph"
                legacy_result.update(
                    _build_rollout_signal(
                        gate_status="graph_run_creation_failed",
                        ready=False,
                        reasons=[f"Graph workflow run creation failed: {exc}."],
                        workflow_def_id=wf_def.id,
                        output_type_id=output_type.id,
                    )
                )
                return legacy_result

            try:
                dispatch_result = execute_graph_workflow(
                    session,
                    workflow_context,
                    dispatch_tasks=False,
                )
                session.commit()
                submit_prepared_workflow_tasks(dispatch_result)
            except Exception as exc:
                session.rollback()
                session.add(run)
                mark_workflow_run_failed(run, str(exc))
                session.commit()
                if isinstance(exc, WorkflowTaskSubmissionError) and exc.submitted_task_ids:
                    logger.exception(
                        "order_line %s: graph workflow submission partially failed after %d task(s); "
                        "not falling back to legacy to avoid duplicate renders",
                        order_line_id,
                        len(exc.submitted_task_ids),
                    )
                    return {
                        "backend": "workflow_graph",
                        "execution_mode": "graph",
                        "workflow_run_id": str(run.id),
                        "workflow_rollout_mode": workflow_rollout_mode,
                        "configured_execution_mode": configured_execution_mode,
                        "submission_status": "partial_failure",
                        "submitted_task_ids": exc.submitted_task_ids,
                        **_build_rollout_signal(
                            gate_status="graph_submission_failed",
                            ready=False,
                            reasons=[
                                "Graph workflow task submission failed after some tasks were already queued.",
                                f"Submission error: {exc}.",
                            ],
                            workflow_def_id=wf_def.id,
                            output_type_id=output_type.id,
                        ),
                    }
                logger.exception(
                    "order_line %s: graph workflow execution via definition %s failed, falling back to legacy dispatch",
                    order_line_id,
                    wf_def.id,
                )
                fallback_result = _legacy_dispatch(order_line_id)
                fallback_result["fallback_from"] = "workflow_graph"
                fallback_result["workflow_run_id"] = str(run.id)
                fallback_result.update(
                    _build_rollout_signal(
                        gate_status="graph_execution_failed",
                        ready=False,
                        reasons=[f"Graph workflow execution failed: {exc}."],
                        workflow_def_id=wf_def.id,
                        output_type_id=output_type.id,
                    )
                )
                return fallback_result

            result = {
                "backend": "workflow_graph",
                "execution_mode": "graph",
                "workflow_run_id": str(run.id),
                "celery_task_id": dispatch_result.task_ids[0] if dispatch_result.task_ids else None,
                "task_ids": dispatch_result.task_ids,
                "workflow_rollout_mode": workflow_rollout_mode,
                "configured_execution_mode": configured_execution_mode,
            }
            result.update(
                _build_rollout_signal(
                    gate_status="graph_authoritative",
                    ready=True,
                    verdict="pass",
                    reasons=["Workflow graph dispatch is authoritative for this output type."],
                    workflow_def_id=wf_def.id,
                    output_type_id=output_type.id,
                )
                )
            return result

        if configured_execution_mode in {"graph", "shadow"} and workflow_rollout_mode == "shadow":
            legacy_result = _legacy_dispatch(order_line_id)

            try:
                workflow_context = _prepare_graph_context("shadow")
            except Exception as exc:
                logger.warning(
                    "order_line %s: shadow graph preparation for workflow_definition_id %s failed (%s), "
                    "continuing with authoritative legacy dispatch only",
                    order_line_id,
                    wf_def.id,
                    exc,
                )
                legacy_result["execution_mode"] = "shadow"
                legacy_result["shadow_status"] = "skipped"
                legacy_result["shadow_error"] = str(exc)
                legacy_result.update(
                    _build_rollout_signal(
                        gate_status="shadow_skipped",
                        ready=False,
                        reasons=[
                            "Shadow workflow preparation failed; legacy dispatch remains authoritative.",
                            f"Preparation error: {exc}.",
                        ],
                        workflow_def_id=wf_def.id,
                        output_type_id=output_type.id,
                    )
                )
                return legacy_result

            run = None
            try:
                run = _create_graph_run(workflow_context)
            except Exception as exc:
                session.rollback()
                logger.warning(
                    "order_line %s: failed to create shadow workflow run for workflow_definition_id %s (%s); "
                    "legacy dispatch remains authoritative",
                    order_line_id,
                    wf_def.id,
                    exc,
                )
                legacy_result["execution_mode"] = "shadow"
                legacy_result["shadow_status"] = "failed"
                legacy_result["shadow_error"] = str(exc)
                legacy_result.update(
                    _build_rollout_signal(
                        gate_status="shadow_run_creation_failed",
                        ready=False,
                        reasons=[
                            "Shadow workflow run could not be created; legacy dispatch remains authoritative.",
                            f"Run creation error: {exc}.",
                        ],
                        workflow_def_id=wf_def.id,
                        output_type_id=output_type.id,
                    )
                )
                return legacy_result

            try:
                dispatch_result = execute_graph_workflow(
                    session,
                    workflow_context,
                    dispatch_tasks=False,
                )
                session.commit()
                submit_prepared_workflow_tasks(dispatch_result)
            except Exception as exc:
                session.rollback()
                session.add(run)
                mark_workflow_run_failed(run, str(exc))
                session.commit()
                if isinstance(exc, WorkflowTaskSubmissionError) and exc.submitted_task_ids:
                    logger.exception(
                        "order_line %s: shadow workflow submission partially failed after %d task(s); "
                        "legacy dispatch remains authoritative",
                        order_line_id,
                        len(exc.submitted_task_ids),
                    )
                    legacy_result["execution_mode"] = "shadow"
                    legacy_result["shadow_status"] = "partial_failure"
                    legacy_result["shadow_error"] = str(exc)
                    legacy_result["shadow_workflow_run_id"] = str(run.id)
                    legacy_result["shadow_submitted_task_ids"] = exc.submitted_task_ids
                    legacy_result.update(
                        _build_rollout_signal(
                            gate_status="shadow_submission_failed",
                            ready=False,
                            reasons=[
                                "Shadow workflow task submission failed after some tasks were already queued.",
                                f"Submission error: {exc}.",
                            ],
                            workflow_def_id=wf_def.id,
                            output_type_id=output_type.id,
                        )
                    )
                    return legacy_result
                logger.exception(
                    "order_line %s: shadow workflow execution via definition %s failed; legacy dispatch remains authoritative",
                    order_line_id,
                    wf_def.id,
                )
                legacy_result["execution_mode"] = "shadow"
                legacy_result["shadow_status"] = "failed"
                legacy_result["shadow_error"] = str(exc)
                legacy_result["shadow_workflow_run_id"] = str(run.id)
                legacy_result.update(
                    _build_rollout_signal(
                        gate_status="shadow_execution_failed",
                        ready=False,
                        reasons=[
                            "Shadow workflow execution failed; legacy dispatch remains authoritative.",
                            f"Execution error: {exc}.",
                        ],
                        workflow_def_id=wf_def.id,
                        output_type_id=output_type.id,
                    )
                )
                return legacy_result

            legacy_result["execution_mode"] = "shadow"
            legacy_result["shadow_status"] = "dispatched"
            legacy_result["shadow_workflow_run_id"] = str(run.id)
            legacy_result["shadow_task_ids"] = dispatch_result.task_ids
            legacy_result["workflow_rollout_mode"] = workflow_rollout_mode
            legacy_result["configured_execution_mode"] = configured_execution_mode
            legacy_result.update(
                _build_rollout_signal(
                    gate_status="pending_shadow_verdict",
                    ready=False,
                    reasons=[
                        "Legacy dispatch remains authoritative until the shadow workflow comparison returns pass.",
                        "A pass verdict is required before workflow-first rollout is ready.",
                    ],
                    workflow_def_id=wf_def.id,
                    output_type_id=output_type.id,
                )
                )
            return legacy_result

        workflow_type, params = extract_runtime_workflow(canonical_config)
        if workflow_type is None or workflow_type == "custom":
            logger.warning(
                "order_line %s: workflow_definition_id %s has no supported preset runtime, "
                "falling back to legacy dispatch",
                order_line_id,
                wf_def.id,
            )
            legacy_result = _legacy_dispatch(order_line_id)
            legacy_result.update(
                _build_rollout_signal(
                    gate_status="workflow_runtime_unsupported",
                    ready=False,
                    reasons=["Workflow definition has no supported preset runtime; legacy dispatch remains authoritative."],
                    workflow_def_id=wf_def.id,
                    output_type_id=output_type.id,
                )
            )
            return legacy_result

        logger.info(
            "order_line %s: dispatching via WorkflowDefinition %s (type=%s)",
            order_line_id,
            wf_def.id,
            workflow_type,
        )

        try:
            workflow_context = prepare_workflow_context(
                canonical_config,
                context_id=order_line_id,
                execution_mode="legacy",
            )
        except Exception as exc:
            logger.warning(
                "order_line %s: workflow_definition_id %s failed runtime preparation (%s), "
                "falling back to legacy dispatch",
                order_line_id,
                wf_def.id,
                exc,
            )
            legacy_result = _legacy_dispatch(order_line_id)
            legacy_result.update(
                _build_rollout_signal(
                    gate_status="workflow_preparation_failed",
                    ready=False,
                    reasons=[f"Workflow runtime preparation failed: {exc}."],
                    workflow_def_id=wf_def.id,
                    output_type_id=output_type.id,
                )
            )
            return legacy_result

        # For turntable workflows: resolve step_path + output_dir from the order line at runtime
        if workflow_type == "turntable" and ("step_path" not in params or "output_dir" not in params):
            from app.domains.products.models import CadFile as _CadFile
            from pathlib import Path as _Path
            from app.config import settings as _cfg
            _product = line.product if hasattr(line, "product") else None
            if _product is None:
                from sqlalchemy.orm import selectinload as _si
                from app.domains.orders.models import OrderLine as _OL
                _line_full = session.execute(
                    select(_OL).where(_OL.id == line.id).options(_si(_OL.product))
                ).scalar_one_or_none()
                _product = _line_full.product if _line_full else None
            if _product and _product.cad_file_id:
                _cad = session.execute(
                    select(_CadFile).where(_CadFile.id == _product.cad_file_id)
                ).scalar_one_or_none()
                if _cad and _cad.stored_path:
                    params.setdefault("step_path", _cad.stored_path)
                    params.setdefault(
                        "output_dir",
                        str(_Path(_cfg.upload_dir) / "renders" / str(line.id)),
                    )

        run = None
        try:
            run = create_workflow_run(
                session,
                workflow_def_id=wf_def.id,
                order_line_id=line.id,
                workflow_context=workflow_context,
            )
            session.commit()
        except Exception as exc:
            session.rollback()
            logger.warning(
                "order_line %s: failed to create workflow run for workflow_definition_id %s (%s), "
                "falling back to legacy dispatch",
                order_line_id,
                wf_def.id,
                exc,
            )
            legacy_result = _legacy_dispatch(order_line_id)
            legacy_result.update(
                _build_rollout_signal(
                    gate_status="workflow_run_creation_failed",
                    ready=False,
                    reasons=[f"Workflow run creation failed: {exc}."],
                    workflow_def_id=wf_def.id,
                    output_type_id=output_type.id,
                )
            )
            return legacy_result

        from app.domains.rendering.workflow_builder import dispatch_workflow

        try:
            celery_task_id = dispatch_workflow(workflow_type, order_line_id, params)
            run.celery_task_id = celery_task_id
            session.commit()
        except Exception as exc:
            session.rollback()
            session.add(run)
            mark_workflow_run_failed(run, str(exc))
            session.commit()
            logger.exception(
                "order_line %s: workflow dispatch via definition %s failed, falling back to legacy dispatch",
                order_line_id,
                wf_def.id,
            )
            legacy_result = _legacy_dispatch(order_line_id)
            legacy_result.update(
                _build_rollout_signal(
                    gate_status="workflow_dispatch_failed",
                    ready=False,
                    reasons=[f"Workflow dispatch failed: {exc}."],
                    workflow_def_id=wf_def.id,
                    output_type_id=output_type.id,
                )
            )
            return legacy_result

        result = {
            "backend": "workflow",
            "workflow_type": workflow_type,
            "execution_mode": "legacy",
            "workflow_run_id": str(run.id),
            "celery_task_id": celery_task_id,
            "workflow_rollout_mode": workflow_rollout_mode,
            "configured_execution_mode": configured_execution_mode,
        }
        result.update(
            _build_rollout_signal(
                gate_status=legacy_runtime_gate_status,
                ready=False,
                reasons=legacy_runtime_reasons,
                workflow_def_id=wf_def.id,
                output_type_id=output_type.id,
            )
        )
        return result


def _legacy_dispatch(order_line_id: str) -> dict:
    """Queue render_order_line_task (the working Celery render implementation)."""
    from app.tasks.step_tasks import render_order_line_task
    render_order_line_task.delay(order_line_id)
    return {"backend": "celery", "queued": True}