feat: render health endpoint + test script + pipeline fixes

- GET /api/worker/health/render: checks render-worker (thumbnail_rendering queue), Blender availability via active_queues inspect, queue depth, last render recency — returns ok/degraded/down status - scripts/test_render_pipeline.py: integration test for full pipeline (--health, --sample, --full modes) - PLAN.md: appended Render Pipeline Fixes section with all B-Fixes - LEARNINGS.md: documented 5 new learnings (queue mismatch, circular import, 307 redirect, worker capability detection) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-06 19:34:12 +01:00
parent 979b0082ec
commit 381f44bc8b
4 changed files with 703 additions and 1 deletions
@@ -0,0 +1,464 @@
+#!/usr/bin/env python3
+"""Render pipeline integration test.
+
+Tests the full pipeline: STEP upload → CAD processing → thumbnail rendering →
+order creation → submit → dispatch renders → wait for completed.
+
+Usage:
+    # Quick smoke test (1 STEP file, 1 output type)
+    python scripts/test_render_pipeline.py --sample
+
+    # Full test — all output types, waits for all renders
+    python scripts/test_render_pipeline.py --full
+
+    # Only check render health endpoint
+    python scripts/test_render_pipeline.py --health
+
+    # Custom credentials / host
+    python scripts/test_render_pipeline.py --sample --host http://localhost:8888 \
+        --email admin@schaeffler.com --password Admin1234!
+
+Environment variables (alternative to flags):
+    TEST_HOST, TEST_EMAIL, TEST_PASSWORD
+"""
+import argparse
+import os
+import sys
+import time
+import json
+import requests
+from pathlib import Path
+
+# ---------------------------------------------------------------------------
+# Config
+# ---------------------------------------------------------------------------
+
+DEFAULT_HOST = os.environ.get("TEST_HOST", "http://localhost:8888")
+DEFAULT_EMAIL = os.environ.get("TEST_EMAIL", "admin@schaeffler.com")
+DEFAULT_PASSWORD = os.environ.get("TEST_PASSWORD", "Admin1234!")
+
+SAMPLE_STEP = Path(__file__).parent.parent / "step-sample-file" / "81113-l_cut.stp"
+
+RENDER_TIMEOUT_SECONDS = 300  # 5 minutes per render
+POLL_INTERVAL_SECONDS = 5
+CAD_PROCESSING_TIMEOUT = 120  # 2 minutes for STEP processing
+
+GREEN = "\033[92m"
+RED = "\033[91m"
+YELLOW = "\033[93m"
+BLUE = "\033[94m"
+RESET = "\033[0m"
+
+passed = []
+failed = []
+warnings = []
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def ok(msg: str):
+    print(f"  {GREEN}✓{RESET} {msg}")
+    passed.append(msg)
+
+
+def fail(msg: str):
+    print(f"  {RED}✗{RESET} {msg}")
+    failed.append(msg)
+
+
+def warn(msg: str):
+    print(f"  {YELLOW}⚠{RESET} {msg}")
+    warnings.append(msg)
+
+
+def info(msg: str):
+    print(f"  {BLUE}→{RESET} {msg}")
+
+
+def section(title: str):
+    print(f"\n{BLUE}{'='*60}{RESET}")
+    print(f"{BLUE}  {title}{RESET}")
+    print(f"{BLUE}{'='*60}{RESET}")
+
+
+class APIClient:
+    def __init__(self, host: str, email: str, password: str):
+        self.host = host.rstrip("/")
+        self.session = requests.Session()
+        self.token: str | None = None
+        self._login(email, password)
+
+    def _login(self, email: str, password: str):
+        resp = self.session.post(
+            f"{self.host}/api/auth/login",
+            data={"username": email, "password": password},
+        )
+        resp.raise_for_status()
+        data = resp.json()
+        self.token = data["access_token"]
+        self.session.headers["Authorization"] = f"Bearer {self.token}"
+
+    def get(self, path: str, **kwargs) -> requests.Response:
+        return self.session.get(f"{self.host}/api{path}", **kwargs)
+
+    def post(self, path: str, **kwargs) -> requests.Response:
+        return self.session.post(f"{self.host}/api{path}", **kwargs)
+
+    def delete(self, path: str, **kwargs) -> requests.Response:
+        return self.session.delete(f"{self.host}/api{path}", **kwargs)
+
+
+# ---------------------------------------------------------------------------
+# Test: Render health endpoint
+# ---------------------------------------------------------------------------
+
+def test_health(client: APIClient) -> bool:
+    section("1. Render Health Check")
+    resp = client.get("/worker/health/render")
+    if resp.status_code != 200:
+        fail(f"GET /worker/health/render → {resp.status_code}: {resp.text[:200]}")
+        return False
+
+    data = resp.json()
+    info(f"Overall status: {data['status']}")
+    info(f"Render worker connected: {data['render_worker_connected']}")
+    info(f"Blender available: {data['blender_available']}")
+    info(f"thumbnail_rendering queue depth: {data['thumbnail_queue_depth']}")
+    if data.get("last_render_at"):
+        info(f"Last render: {data['last_render_at']} ({'success' if data['last_render_success'] else 'FAILED'}, {data['last_render_age_minutes']}m ago)")
+
+    if data["render_worker_connected"]:
+        ok("Render worker connected")
+    else:
+        fail("Render worker NOT connected — renders will fail")
+
+    if data["blender_available"]:
+        ok("Blender renderer reachable (port 8100)")
+    else:
+        fail("Blender renderer NOT reachable — thumbnail/order renders will fail")
+
+    if data["thumbnail_queue_ok"]:
+        ok(f"thumbnail_rendering queue healthy (depth={data['thumbnail_queue_depth']})")
+    else:
+        warn(f"thumbnail_rendering queue DEEP ({data['thumbnail_queue_depth']} tasks) — renders may be slow")
+
+    return data["status"] != "down"
+
+
+# ---------------------------------------------------------------------------
+# Test: STEP upload + CAD processing
+# ---------------------------------------------------------------------------
+
+def test_step_upload(client: APIClient, step_file: Path) -> str | None:
+    """Upload STEP file, wait for completed processing. Returns cad_file_id or None."""
+    section("2. STEP Upload + CAD Processing")
+
+    if not step_file.exists():
+        fail(f"Sample STEP file not found: {step_file}")
+        return None
+
+    info(f"Uploading {step_file.name} ({step_file.stat().st_size // 1024} KB)")
+    with open(step_file, "rb") as f:
+        resp = client.post(
+            "/uploads/step",
+            files={"file": (step_file.name, f, "application/octet-stream")},
+        )
+
+    if resp.status_code not in (200, 201):
+        fail(f"STEP upload failed: {resp.status_code} {resp.text[:300]}")
+        return None
+
+    data = resp.json()
+    cad_file_id = data["cad_file_id"]
+    ok(f"STEP uploaded → cad_file_id={cad_file_id[:8]}... status={data.get('status')}")
+
+    # Poll for completed processing
+    info(f"Waiting for CAD processing (timeout={CAD_PROCESSING_TIMEOUT}s)...")
+    deadline = time.time() + CAD_PROCESSING_TIMEOUT
+    last_status = None
+    while time.time() < deadline:
+        resp = client.get(f"/cad/{cad_file_id}")
+        if resp.status_code == 200:
+            cad = resp.json()
+            status = cad.get("processing_status")
+            if status != last_status:
+                info(f"  CAD status: {status}")
+                last_status = status
+            if status == "completed":
+                ok(f"CAD processing completed (thumbnail rendered)")
+                return cad_file_id
+            if status == "failed":
+                fail(f"CAD processing FAILED: {cad.get('error_message', 'unknown error')}")
+                return None
+        time.sleep(POLL_INTERVAL_SECONDS)
+
+    fail(f"CAD processing timed out after {CAD_PROCESSING_TIMEOUT}s (last status: {last_status})")
+    return None
+
+
+# ---------------------------------------------------------------------------
+# Test: Order creation + submit + dispatch + wait
+# ---------------------------------------------------------------------------
+
+def test_order_render(
+    client: APIClient,
+    cad_file_id: str,
+    output_type_ids: list[str],
+    test_label: str,
+) -> bool:
+    """Create a minimal order, submit, dispatch renders, wait for completion."""
+    section(f"3. Order Render — {test_label}")
+    info(f"Output types: {len(output_type_ids)}")
+
+    # Get a product that uses this CAD file
+    resp = client.get(f"/cad/{cad_file_id}")
+    if resp.status_code != 200:
+        fail(f"CAD file lookup failed: {resp.status_code}")
+        return False
+
+    # Find or create a product linked to this CAD file
+    product_id = None
+    resp_products = client.get("/products/?limit=100")
+    if resp_products.status_code == 200:
+        products = resp_products.json()
+        if isinstance(products, dict):
+            products = products.get("items", [])
+        for p in products:
+            if str(p.get("cad_file_id")) == cad_file_id:
+                product_id = str(p["id"])
+                info(f"Using existing product: {p.get('name', p['id'])[:40]}")
+                break
+
+    if not product_id:
+        # Create a minimal test product
+        resp_create = client.post("/products/", json={
+            "name": f"Test Product {cad_file_id[:8]}",
+            "pim_id": f"TEST-{cad_file_id[:8]}",
+            "is_active": True,
+            "cad_file_id": cad_file_id,
+        })
+        if resp_create.status_code not in (200, 201):
+            fail(f"Product creation failed: {resp_create.status_code} {resp_create.text[:200]}")
+            return False
+        product_id = resp_create.json()["id"]
+        ok(f"Created test product: {product_id[:8]}...")
+
+    # Build output_type_selections for one product
+    ot_selections = [{"product_id": product_id, "output_type_id": ot_id} for ot_id in output_type_ids]
+
+    # Create order via wizard endpoint
+    resp_order = client.post("/orders/product-order", json={
+        "product_id": product_id,
+        "output_type_selections": [
+            {"output_type_id": ot_id}
+            for ot_id in output_type_ids
+        ],
+    })
+    if resp_order.status_code not in (200, 201):
+        # Fallback: try to find existing submitted order
+        warn(f"Product order wizard not available ({resp_order.status_code}), looking for existing order lines...")
+        return _test_existing_renders(client, product_id, output_type_ids)
+
+    order = resp_order.json()
+    order_id = order["id"]
+    ok(f"Order created: {order.get('order_number')} (id={order_id[:8]}...)")
+
+    return _submit_and_wait(client, order_id, output_type_ids)
+
+
+def _test_existing_renders(client: APIClient, product_id: str, output_type_ids: list[str]) -> bool:
+    """Find existing order lines for a product and wait for completion."""
+    resp = client.get(f"/orders/?limit=20")
+    if resp.status_code != 200:
+        fail("Could not list orders")
+        return False
+    orders = resp.json()
+    if isinstance(orders, dict):
+        orders = orders.get("items", [])
+    for order in orders:
+        if order.get("status") in ("submitted", "processing", "rendering"):
+            return _submit_and_wait(client, order["id"], output_type_ids)
+    warn("No suitable existing orders found for render test")
+    return True  # non-blocking warning
+
+
+def _submit_and_wait(client: APIClient, order_id: str, output_type_ids: list[str]) -> bool:
+    # Submit
+    resp_sub = client.post(f"/orders/{order_id}/submit")
+    if resp_sub.status_code not in (200, 201, 204):
+        if resp_sub.status_code == 409:
+            info("Order already submitted")
+        else:
+            fail(f"Order submit failed: {resp_sub.status_code} {resp_sub.text[:200]}")
+            return False
+    else:
+        ok("Order submitted")
+
+    # Dispatch renders
+    resp_disp = client.post(f"/orders/{order_id}/dispatch-renders")
+    if resp_disp.status_code not in (200, 201, 204):
+        fail(f"Dispatch renders failed: {resp_disp.status_code} {resp_disp.text[:200]}")
+        return False
+    dispatch_data = resp_disp.json() if resp_disp.content else {}
+    dispatched = dispatch_data.get("dispatched", "?")
+    ok(f"Renders dispatched ({dispatched} lines)")
+
+    # Poll for order completion
+    info(f"Waiting for renders to complete (timeout={RENDER_TIMEOUT_SECONDS}s per OT)...")
+    deadline = time.time() + RENDER_TIMEOUT_SECONDS * max(len(output_type_ids), 1)
+    last_summary = ""
+    while time.time() < deadline:
+        resp_ord = client.get(f"/orders/{order_id}")
+        if resp_ord.status_code != 200:
+            fail(f"Order poll failed: {resp_ord.status_code}")
+            return False
+        order = resp_ord.json()
+        order_status = order.get("status")
+        lines = order.get("lines", order.get("order_lines", []))
+        statuses = [l.get("render_status") for l in lines]
+        summary = f"order={order_status} lines={statuses}"
+        if summary != last_summary:
+            info(f"  {summary}")
+            last_summary = summary
+
+        if order_status == "completed":
+            ok(f"Order completed — all {len(lines)} render(s) done")
+            # Check individual line results
+            all_success = True
+            for line in lines:
+                rs = line.get("render_status")
+                ot_name = line.get("output_type_name") or line.get("output_type", {}).get("name", "?")
+                if rs == "completed":
+                    ok(f"  Line [{ot_name}]: completed")
+                elif rs == "failed":
+                    fail(f"  Line [{ot_name}]: FAILED")
+                    all_success = False
+                else:
+                    warn(f"  Line [{ot_name}]: {rs}")
+            return all_success
+
+        if order_status == "failed":
+            fail(f"Order FAILED — check render logs")
+            return False
+
+        time.sleep(POLL_INTERVAL_SECONDS)
+
+    fail(f"Render timed out after {(time.time() - deadline + RENDER_TIMEOUT_SECONDS * max(len(output_type_ids), 1)):.0f}s")
+    return False
+
+
+# ---------------------------------------------------------------------------
+# Get output types
+# ---------------------------------------------------------------------------
+
+def get_output_types(client: APIClient) -> list[dict]:
+    resp = client.get("/output-types/")
+    if resp.status_code != 200:
+        resp = client.get("/output-types")
+    if resp.status_code != 200:
+        return []
+    data = resp.json()
+    if isinstance(data, dict):
+        data = data.get("items", [])
+    return [ot for ot in data if ot.get("is_active", True)]
+
+
+# ---------------------------------------------------------------------------
+# Main
+# ---------------------------------------------------------------------------
+
+def main():
+    parser = argparse.ArgumentParser(description="Render pipeline integration tests")
+    parser.add_argument("--host", default=DEFAULT_HOST)
+    parser.add_argument("--email", default=DEFAULT_EMAIL)
+    parser.add_argument("--password", default=DEFAULT_PASSWORD)
+    parser.add_argument("--health", action="store_true", help="Only run health check")
+    parser.add_argument("--sample", action="store_true", help="Quick sample test (1 STEP, 1 OT)")
+    parser.add_argument("--full", action="store_true", help="Full test (all output types)")
+    parser.add_argument("--step", default=str(SAMPLE_STEP), help="Path to STEP file")
+    args = parser.parse_args()
+
+    if not any([args.health, args.sample, args.full]):
+        parser.print_help()
+        sys.exit(0)
+
+    print(f"\n{BLUE}Render Pipeline Test{RESET}")
+    print(f"Host: {args.host}")
+    print(f"Mode: {'health' if args.health else 'sample' if args.sample else 'full'}")
+
+    # Login
+    try:
+        client = APIClient(args.host, args.email, args.password)
+        ok(f"Authenticated as {args.email}")
+    except Exception as exc:
+        fail(f"Authentication failed: {exc}")
+        sys.exit(1)
+
+    # Health check
+    health_ok = test_health(client)
+
+    if args.health:
+        _print_summary()
+        sys.exit(0 if not failed else 1)
+
+    if not health_ok:
+        warn("Health check failed — render tests may not work. Continuing anyway...")
+
+    # STEP upload
+    step_path = Path(args.step)
+    cad_file_id = test_step_upload(client, step_path)
+
+    if not cad_file_id:
+        fail("STEP processing failed — cannot proceed to render tests")
+        _print_summary()
+        sys.exit(1)
+
+    # Get output types
+    output_types = get_output_types(client)
+    if not output_types:
+        fail("No active output types found")
+        _print_summary()
+        sys.exit(1)
+
+    info(f"Found {len(output_types)} active output types: {[ot['name'] for ot in output_types]}")
+
+    if args.sample:
+        # Pick the first non-animation output type (fastest)
+        ot = next(
+            (ot for ot in output_types if not ot.get("is_animation") and "LQ" in ot["name"].upper()),
+            output_types[0],
+        )
+        info(f"Sample test using output type: {ot['name']}")
+        test_order_render(client, cad_file_id, [ot["id"]], f"Sample [{ot['name']}]")
+
+    elif args.full:
+        # Test each output type individually
+        for ot in output_types:
+            if ot.get("is_animation"):
+                warn(f"Skipping animation output type: {ot['name']} (too slow for full test)")
+                continue
+            test_order_render(client, cad_file_id, [ot["id"]], ot["name"])
+
+    _print_summary()
+    sys.exit(0 if not failed else 1)
+
+
+def _print_summary():
+    section("Test Summary")
+    print(f"  {GREEN}Passed:{RESET} {len(passed)}")
+    print(f"  {RED}Failed:{RESET} {len(failed)}")
+    print(f"  {YELLOW}Warnings:{RESET} {len(warnings)}")
+    if failed:
+        print(f"\n{RED}FAILURES:{RESET}")
+        for f_ in failed:
+            print(f"  - {f_}")
+    if not failed:
+        print(f"\n{GREEN}All tests passed!{RESET}")
+    else:
+        print(f"\n{RED}Tests FAILED{RESET}")
+
+
+if __name__ == "__main__":
+    main()