fix: revert dual queue to single GPU — light worker caused 2x regression

Root cause: render-worker and render-worker-light shared the same GPU,
causing contention. Complex TRB renders went from 17s → 36s (2x slower).

Changes:
- Thumbnails back to asset_pipeline queue (not asset_pipeline_light)
- Dispatch routing always uses asset_pipeline (no queue splitting)
- render-worker-light gated behind "multi-gpu" profile — only starts with:
  docker compose --profile multi-gpu up -d
- For single-GPU setups: all rendering is sequential on one worker

The dual queue approach is correct for multi-GPU machines where each
worker gets its own GPU. On single-GPU, serial execution is faster
than concurrent GPU contention.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-15 12:33:26 +01:00
parent b892f72f7e
commit daad2c64f3
3 changed files with 9 additions and 30 deletions
@@ -40,35 +40,13 @@ def dispatch_order_line_render(order_line_id: str):
logger.info(f"OrderLine {order_line_id}: order {order.status.value} — not dispatching") logger.info(f"OrderLine {order_line_id}: order {order.status.value} — not dispatching")
return return
# Route light renders (small stills) to asset_pipeline_light, # All renders go to asset_pipeline (single-GPU default).
# heavy renders (HQ stills, animations) stay on asset_pipeline. # For multi-GPU setups: enable render-worker-light in docker-compose
is_animation = False # and change target_queue logic below to route small stills to
max_dim = 0 # asset_pipeline_light for concurrent rendering.
if line: pass
from app.models.output_type import OutputType
ot = session.execute(
select(OutputType).where(OutputType.id == line.output_type_id)
).scalar_one_or_none() if line.output_type_id else None
if ot:
is_animation = bool(getattr(ot, 'is_animation', False))
rs = ot.render_settings or {}
w = int(rs.get("width", 0) or 0)
h = int(rs.get("height", 0) or 0)
max_dim = max(w, h)
# Apply render_overrides for routing decisions
ro = getattr(line, 'render_overrides', None) or {}
if ro.get("width"):
w = int(ro["width"])
if ro.get("height"):
h = int(ro["height"])
if w or h:
max_dim = max(w, h)
if max_dim > 0 and max_dim <= 1024 and not is_animation:
target_queue = "asset_pipeline_light"
else:
target_queue = "asset_pipeline" target_queue = "asset_pipeline"
logger.info(f"Dispatching render for order line: {order_line_id} -> queue={target_queue}") logger.info(f"Dispatching render for order line: {order_line_id} -> queue={target_queue}")
render_order_line_task.apply_async(args=[order_line_id], queue=target_queue) render_order_line_task.apply_async(args=[order_line_id], queue=target_queue)
@@ -72,7 +72,7 @@ def _pipeline_session(tenant_id: str | None = None):
engine.dispose() engine.dispose()
@celery_app.task(bind=True, name="app.tasks.step_tasks.render_step_thumbnail", queue="asset_pipeline_light") @celery_app.task(bind=True, name="app.tasks.step_tasks.render_step_thumbnail", queue="asset_pipeline")
def render_step_thumbnail(self, cad_file_id: str): def render_step_thumbnail(self, cad_file_id: str):
"""Render the thumbnail for a freshly-processed STEP file. """Render the thumbnail for a freshly-processed STEP file.
@@ -188,7 +188,7 @@ def render_step_thumbnail(self, cad_file_id: str):
pl.step_done("render_step_thumbnail") pl.step_done("render_step_thumbnail")
@celery_app.task(bind=True, name="app.tasks.step_tasks.regenerate_thumbnail", queue="asset_pipeline_light") @celery_app.task(bind=True, name="app.tasks.step_tasks.regenerate_thumbnail", queue="asset_pipeline")
def regenerate_thumbnail(self, cad_file_id: str, part_colors: dict): def regenerate_thumbnail(self, cad_file_id: str, part_colors: dict):
"""Regenerate thumbnail with per-part colours.""" """Regenerate thumbnail with per-part colours."""
pl = PipelineLogger(task_id=self.request.id) pl = PipelineLogger(task_id=self.request.id)
+1
View File
@@ -157,6 +157,7 @@ services:
capabilities: [gpu, compute, utility, graphics] capabilities: [gpu, compute, utility, graphics]
render-worker-light: render-worker-light:
profiles: ["multi-gpu"] # Only start with: docker compose --profile multi-gpu up -d
build: build:
context: . context: .
dockerfile: render-worker/Dockerfile dockerfile: render-worker/Dockerfile