feat: add workflow output comparison tooling

2026-04-07 11:45:28 +02:00
parent f43f1e7420
commit ffcaef4659
6 changed files with 368 additions and 1 deletions
@@ -188,3 +188,28 @@ class WorkflowRunOut(BaseModel):
    created_at: datetime
    node_results: list[WorkflowNodeResultOut] = []
    model_config = {"from_attributes": True}
 class WorkflowComparisonArtifactOut(BaseModel):
    path: str | None
    storage_key: str | None
    exists: bool
    file_size_bytes: int | None
    sha256: str | None
    mime_type: str | None
    image_width: int | None
    image_height: int | None
 class WorkflowRunComparisonOut(BaseModel):
    workflow_run_id: uuid.UUID
    workflow_def_id: uuid.UUID | None
    order_line_id: uuid.UUID | None
    execution_mode: str
    status: str
    summary: str
    authoritative_output: WorkflowComparisonArtifactOut
    observer_output: WorkflowComparisonArtifactOut
    exact_match: bool | None
    dimensions_match: bool | None
    mean_pixel_delta: float | None
@@ -0,0 +1,239 @@
 from __future__ import annotations
 import hashlib
 import mimetypes
 import uuid
 from dataclasses import dataclass
 from pathlib import Path
 from PIL import Image, ImageChops, ImageStat
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.domains.media.models import MediaAsset
 from app.domains.orders.models import OrderLine
 from app.domains.rendering.models import WorkflowRun
 from app.domains.rendering.schemas import WorkflowComparisonArtifactOut, WorkflowRunComparisonOut
@dataclass(slots=True)
 class _ArtifactComparison:
    path: str | None
    storage_key: str | None
    exists: bool
    file_size_bytes: int | None
    sha256: str | None
    mime_type: str | None
    image_width: int | None
    image_height: int | None
    def to_schema(self) -> WorkflowComparisonArtifactOut:
        return WorkflowComparisonArtifactOut(
            path=self.path,
            storage_key=self.storage_key,
            exists=self.exists,
            file_size_bytes=self.file_size_bytes,
            sha256=self.sha256,
            mime_type=self.mime_type,
            image_width=self.image_width,
            image_height=self.image_height,
        )
 def _normalize_storage_key(path: str | None) -> str | None:
    if not path:
        return None
    normalized = path.replace("\\", "/")
    marker = "/uploads/"
    if marker in normalized:
        return normalized.split(marker, 1)[1]
    return normalized.lstrip("/")
 def _build_artifact(path: str | None) -> _ArtifactComparison:
    if not path:
        return _ArtifactComparison(
            path=None,
            storage_key=None,
            exists=False,
            file_size_bytes=None,
            sha256=None,
            mime_type=None,
            image_width=None,
            image_height=None,
        )
    file_path = Path(path)
    exists = file_path.exists()
    mime_type, _ = mimetypes.guess_type(str(file_path))
    sha256 = None
    file_size_bytes = None
    image_width = None
    image_height = None
    if exists and file_path.is_file():
        file_size_bytes = file_path.stat().st_size
        digest = hashlib.sha256()
        with file_path.open("rb") as handle:
            for chunk in iter(lambda: handle.read(1024 * 1024), b""):
                digest.update(chunk)
        sha256 = digest.hexdigest()
        try:
            with Image.open(file_path) as image:
                image_width, image_height = image.size
        except Exception:
            image_width = None
            image_height = None
    return _ArtifactComparison(
        path=str(file_path),
        storage_key=_normalize_storage_key(str(file_path)),
        exists=exists,
        file_size_bytes=file_size_bytes,
        sha256=sha256,
        mime_type=mime_type,
        image_width=image_width,
        image_height=image_height,
    )
 def _compute_mean_pixel_delta(
    authoritative_path: str | None,
    observer_path: str | None,
 ) -> float | None:
    if not authoritative_path or not observer_path:
        return None
    authoritative_file = Path(authoritative_path)
    observer_file = Path(observer_path)
    if not authoritative_file.exists() or not observer_file.exists():
        return None
    try:
        with Image.open(authoritative_file) as authoritative_image, Image.open(observer_file) as observer_image:
            authoritative_rgba = authoritative_image.convert("RGBA")
            observer_rgba = observer_image.convert("RGBA")
            if authoritative_rgba.size != observer_rgba.size:
                return None
            diff = ImageChops.difference(authoritative_rgba, observer_rgba)
            mean_channels = ImageStat.Stat(diff).mean
            return sum(mean_channels) / (len(mean_channels) * 255.0)
    except Exception:
        return None
 async def _load_shadow_asset_by_workflow_run(
    db: AsyncSession,
    workflow_run_id: uuid.UUID,
 ) -> str | None:
    asset_result = await db.execute(
        select(MediaAsset)
        .where(MediaAsset.workflow_run_id == workflow_run_id)
        .order_by(MediaAsset.created_at.desc())
        .limit(1)
    )
    asset = asset_result.scalar_one_or_none()
    if asset is None:
        return None
    storage_key = asset.storage_key.lstrip("/")
    if storage_key.startswith("app/uploads/"):
        return f"/{storage_key}"
    return f"/app/uploads/{storage_key}"
 def _find_shadow_file(order_line: OrderLine, workflow_run: WorkflowRun) -> str | None:
    shadow_suffix = f"shadow-{str(workflow_run.id)[:8]}"
    candidate_roots: list[Path] = []
    if order_line.result_path:
        candidate_roots.append(Path(order_line.result_path).parent)
    candidate_roots.append(Path("/app/uploads/renders") / str(order_line.id))
    seen_roots: set[Path] = set()
    candidates: list[Path] = []
    for root in candidate_roots:
        if root in seen_roots:
            continue
        seen_roots.add(root)
        if not root.exists():
            continue
        matches = [path for path in root.iterdir() if path.is_file() and shadow_suffix in path.name]
        candidates.extend(matches)
    if not candidates:
        return None
    candidates.sort(key=lambda path: (path.stat().st_mtime, path.name), reverse=True)
    return str(candidates[0])
 async def build_workflow_run_comparison(
    db: AsyncSession,
    workflow_run_id: uuid.UUID,
 ) -> WorkflowRunComparisonOut | None:
    run_result = await db.execute(select(WorkflowRun).where(WorkflowRun.id == workflow_run_id))
    workflow_run = run_result.scalar_one_or_none()
    if workflow_run is None:
        return None
    order_line = None
    if workflow_run.order_line_id is not None:
        order_line = await db.get(OrderLine, workflow_run.order_line_id)
    authoritative_path = order_line.result_path if order_line is not None else None
    observer_path = await _load_shadow_asset_by_workflow_run(db, workflow_run.id)
    if observer_path is None and order_line is not None:
        observer_path = _find_shadow_file(order_line, workflow_run)
    authoritative_output = _build_artifact(authoritative_path)
    observer_output = _build_artifact(observer_path)
    if not authoritative_output.exists:
        status = "missing_authoritative"
        summary = "Authoritative legacy output is missing."
        exact_match = None
        dimensions_match = None
        mean_pixel_delta = None
    elif not observer_output.exists:
        status = "missing_observer"
        summary = "Observer workflow output is missing."
        exact_match = None
        dimensions_match = None
        mean_pixel_delta = None
    else:
        exact_match = authoritative_output.sha256 == observer_output.sha256
        dimensions_match = (
            authoritative_output.image_width == observer_output.image_width
            and authoritative_output.image_height == observer_output.image_height
            and authoritative_output.image_width is not None
            and observer_output.image_width is not None
        )
        mean_pixel_delta = _compute_mean_pixel_delta(authoritative_output.path, observer_output.path)
        if exact_match:
            status = "matched"
            summary = "Observer output matches the authoritative legacy output byte-for-byte."
        else:
            status = "different"
            if dimensions_match is False:
                summary = "Observer output differs from the authoritative output and the image dimensions changed."
            elif mean_pixel_delta is not None:
                summary = "Observer output differs from the authoritative output."
            else:
                summary = "Observer output differs from the authoritative output and could not be pixel-compared."
    return WorkflowRunComparisonOut(
        workflow_run_id=workflow_run.id,
        workflow_def_id=workflow_run.workflow_def_id,
        order_line_id=workflow_run.order_line_id,
        execution_mode=workflow_run.execution_mode,
        status=status,
        summary=summary,
        authoritative_output=authoritative_output.to_schema(),
        observer_output=observer_output.to_schema(),
        exact_match=exact_match,
        dimensions_match=dimensions_match,
        mean_pixel_delta=mean_pixel_delta,
    )
@@ -15,8 +15,10 @@ from app.domains.rendering.schemas import (
    WorkflowDefinitionCreate,
    WorkflowDefinitionUpdate,
    WorkflowDefinitionOut,
    WorkflowRunComparisonOut,
    WorkflowRunOut,
 )
 from app.domains.rendering.workflow_comparison_service import build_workflow_run_comparison
 from app.domains.rendering.workflow_config_utils import canonicalize_workflow_config
 from app.domains.rendering.workflow_node_registry import (
    StepCategory,
@@ -199,6 +201,18 @@ async def list_workflow_runs(
    return result.scalars().all()
@router.get("/runs/{run_id}/comparison", response_model=WorkflowRunComparisonOut)
 async def get_workflow_run_comparison(
    run_id: uuid.UUID,
    _user: User = Depends(require_admin_or_pm),
    db: AsyncSession = Depends(get_db),
 ):
    comparison = await build_workflow_run_comparison(db, run_id)
    if comparison is None:
        raise HTTPException(status_code=404, detail="Workflow run not found")
    return comparison
 class WorkflowDispatchResponse(BaseModel):
    workflow_run: WorkflowRunOut
    context_id: str
@@ -4,6 +4,7 @@ import uuid
 from pathlib import Path
 import pytest
 from PIL import Image
 from sqlalchemy import select
 from sqlalchemy.orm import selectinload
@@ -545,3 +546,90 @@ async def test_workflow_dispatch_endpoint_returns_workflow_run_with_node_results
    assert node_results["template"]["status"] == "completed"
    assert node_results["template"]["output"]["use_materials"] is False
    assert node_results["output"]["status"] == "skipped"
@pytest.mark.asyncio
 async def test_workflow_run_comparison_endpoint_reports_identical_shadow_output(
    client,
    db,
    admin_user,
    auth_headers,
    tmp_path,
 ):
    order_line = await _seed_renderable_order_line(db, admin_user, tmp_path)
    workflow_run = WorkflowRun(
        order_line_id=order_line.id,
        execution_mode="shadow",
        status="completed",
    )
    db.add(workflow_run)
    await db.flush()
    render_dir = tmp_path / "comparison" / str(order_line.id)
    render_dir.mkdir(parents=True, exist_ok=True)
    authoritative_path = render_dir / "authoritative.png"
    shadow_path = render_dir / f"authoritative_shadow-{str(workflow_run.id)[:8]}.png"
    Image.new("RGBA", (8, 8), (0, 128, 255, 255)).save(authoritative_path)
    Image.new("RGBA", (8, 8), (0, 128, 255, 255)).save(shadow_path)
    order_line.result_path = str(authoritative_path)
    order_line.render_status = "completed"
    await db.commit()
    response = await client.get(
        f"/api/workflows/runs/{workflow_run.id}/comparison",
        headers=auth_headers,
    )
    assert response.status_code == 200
    body = response.json()
    assert body["workflow_run_id"] == str(workflow_run.id)
    assert body["execution_mode"] == "shadow"
    assert body["status"] == "matched"
    assert body["exact_match"] is True
    assert body["dimensions_match"] is True
    assert body["mean_pixel_delta"] == 0.0
    assert body["authoritative_output"]["path"] == str(authoritative_path)
    assert body["observer_output"]["path"] == str(shadow_path)
    assert body["authoritative_output"]["image_width"] == 8
    assert body["observer_output"]["image_height"] == 8
@pytest.mark.asyncio
 async def test_workflow_run_comparison_endpoint_reports_missing_shadow_output(
    client,
    db,
    admin_user,
    auth_headers,
    tmp_path,
 ):
    order_line = await _seed_renderable_order_line(db, admin_user, tmp_path)
    workflow_run = WorkflowRun(
        order_line_id=order_line.id,
        execution_mode="shadow",
        status="completed",
    )
    db.add(workflow_run)
    await db.flush()
    render_dir = tmp_path / "comparison-missing" / str(order_line.id)
    render_dir.mkdir(parents=True, exist_ok=True)
    authoritative_path = render_dir / "authoritative.png"
    Image.new("RGBA", (4, 4), (255, 64, 64, 255)).save(authoritative_path)
    order_line.result_path = str(authoritative_path)
    order_line.render_status = "completed"
    await db.commit()
    response = await client.get(
        f"/api/workflows/runs/{workflow_run.id}/comparison",
        headers=auth_headers,
    )
    assert response.status_code == 200
    body = response.json()
    assert body["status"] == "missing_observer"
    assert body["exact_match"] is None
    assert body["observer_output"]["exists"] is False
    assert body["authoritative_output"]["exists"] is True
@@ -43,6 +43,7 @@
 ### Phase 6
 - [x] Shadow mode parity execution dispatches real graph observer runs alongside authoritative legacy dispatch
 - Progress: Workflow runs now expose a comparison endpoint that resolves authoritative legacy outputs and matching shadow artifacts, including file hashes, image dimensions, and mean pixel delta for parity inspection.
 - [ ] Golden cases pass against legacy outputs
 - [ ] Rollout can be enabled per workflow or output type
 - [ ] Rollback to legacy is immediate
@@ -93,7 +93,7 @@
 ### Tickets
 - `E6-T1` Add shadow mode parity execution. `completed`
- `E6-T2` Build output comparison tooling.
+- `E6-T2` Build output comparison tooling. `completed`
 - `E6-T3` Define golden test cases.
 - `E6-T4` Roll out per workflow or output type.
 - `E6-T5` Keep legacy fallback after rollout.