"""STEP metadata extraction tasks. Covers: - process_step_file — extract OCC objects + queue thumbnail - reextract_cad_metadata — re-compute bounding-box for completed files - _auto_populate_materials_for_cad — helper: fill cad_part_materials from Excel - _bbox_from_glb / _bbox_from_step_cadquery — bbox helpers """ import logging from pathlib import Path from app.tasks.celery_app import celery_app from app.core.task_logs import log_task_event from app.core.pipeline_logger import PipelineLogger logger = logging.getLogger(__name__) def _bbox_from_glb(glb_path: str) -> dict | None: """Backward-compatible wrapper for GLB bbox extraction.""" from app.domains.rendering.workflow_runtime_services import extract_bbox_from_glb return extract_bbox_from_glb(glb_path) def _bbox_from_step_cadquery(step_path: str) -> dict | None: """Backward-compatible wrapper for STEP bbox fallback extraction.""" from app.domains.rendering.workflow_runtime_services import extract_bbox_from_step_cadquery return extract_bbox_from_step_cadquery(step_path) @celery_app.task(bind=True, name="app.tasks.step_tasks.process_step_file", queue="step_processing") def process_step_file( self, cad_file_id: str, workflow_run_id: str | None = None, workflow_node_id: str | None = None, **_: object, ): """Process a STEP file: extract objects, generate thumbnail, convert to glTF. After processing completes, auto-populate cad_part_materials from Excel component data for any linked products that don't yet have materials assigned. A per-file Redis lock (TTL = 10 min) prevents duplicate tasks from processing the same file concurrently — e.g. when 'Process Unprocessed' is clicked while a file is already being processed. """ import redis as redis_lib from app.config import settings as app_settings pl = PipelineLogger(task_id=self.request.id) pl.step_start("process_step_file", {"cad_file_id": cad_file_id}) # Resolve and log tenant context at task start (required for RLS) from app.core.tenant_context import resolve_tenant_id_for_cad _tenant_id = resolve_tenant_id_for_cad(cad_file_id) lock_key = f"step_processing_lock:{cad_file_id}" r = redis_lib.from_url(app_settings.redis_url) acquired = r.set(lock_key, "1", nx=True, ex=600) # 10-minute TTL if not acquired: logger.warning(f"STEP file {cad_file_id} is already being processed — skipping duplicate task") return try: pl.info("process_step_file", f"Processing STEP file (metadata only): {cad_file_id}") try: from app.services.step_processor import extract_cad_metadata extract_cad_metadata(cad_file_id, tenant_id=_tenant_id) except Exception as exc: pl.step_error("process_step_file", f"STEP metadata extraction failed: {exc}", exc) r.delete(lock_key) # release lock so a retry can proceed raise self.retry(exc=exc, countdown=60, max_retries=3) # Extract rich metadata (volume, surface area, complexity, etc.) — non-fatal try: from sqlalchemy import create_engine, update as sql_update from sqlalchemy.orm import Session as SyncSession from app.config import settings as cfg from app.services.step_processor import extract_rich_metadata from app.models.cad_file import CadFile from app.models.product import Product from app.core.tenant_context import set_tenant_context_sync eng = create_engine(cfg.database_url_sync) try: # Load stored_path for the cad file with SyncSession(eng) as session: set_tenant_context_sync(session, _tenant_id) cad_file = session.get(CadFile, cad_file_id) step_path = cad_file.stored_path if cad_file else None if step_path: rich_meta = extract_rich_metadata(str(step_path)) if rich_meta and rich_meta.get("part_count", 0) > 0: with SyncSession(eng) as session: set_tenant_context_sync(session, _tenant_id) # Merge into cad_files.mesh_attributes cad_file = session.get(CadFile, cad_file_id) if cad_file: existing_attrs = cad_file.mesh_attributes or {} existing_attrs["rich_metadata"] = rich_meta session.execute( sql_update(CadFile) .where(CadFile.id == cad_file_id) .values(mesh_attributes=existing_attrs) ) # Update all active products linked to this CAD file session.execute( sql_update(Product) .where(Product.cad_file_id == cad_file_id, Product.is_active.is_(True)) .values(cad_metadata=rich_meta) ) session.commit() logger.info( f"Rich metadata extracted for cad_file {cad_file_id}: " f"{rich_meta.get('part_count')} parts, " f"{rich_meta.get('total_volume_cm3', 0):.1f} cm³" ) finally: eng.dispose() except Exception: logger.exception(f"Rich metadata extraction failed for cad_file {cad_file_id} (non-fatal)") finally: r.delete(lock_key) # always release on completion or unhandled error pl.step_done("process_step_file") try: from app.domains.rendering.tasks import _update_workflow_run_status _update_workflow_run_status( cad_file_id, "completed", workflow_run_id=workflow_run_id, workflow_node_id=workflow_node_id, ) except Exception: logger.exception("Failed to update workflow state for process_step_file %s", cad_file_id) # Legacy flow still auto-queues thumbnail generation here. # Graph-mode workflows dispatch explicit thumbnail save/render nodes instead. if workflow_run_id is None: from app.domains.pipeline.tasks.render_thumbnail import render_step_thumbnail render_step_thumbnail.delay(cad_file_id) def _auto_populate_materials_for_cad(cad_file_id: str, tenant_id: str | None = None) -> None: """Sync helper: auto-populate cad_part_materials from Excel for newly-processed CAD files. Only fills products where cad_part_materials is empty or all-blank, preventing overwrites of manually assigned materials. """ from sqlalchemy import create_engine from sqlalchemy.orm import Session from app.config import settings as app_settings from app.core.tenant_context import set_tenant_context_sync from app.domains.rendering.workflow_runtime_services import auto_populate_materials_for_cad sync_url = app_settings.database_url.replace("+asyncpg", "") eng = create_engine(sync_url) with Session(eng) as session: set_tenant_context_sync(session, tenant_id) auto_populate_materials_for_cad(session, cad_file_id) eng.dispose() @celery_app.task(name="app.tasks.step_tasks.reextract_rich_metadata_task", queue="asset_pipeline") def reextract_rich_metadata_task(): """Batch re-extract rich metadata (volume, surface area, complexity) for all completed CAD files.""" from sqlalchemy import create_engine, select as sql_select, update as sql_update from sqlalchemy.orm import Session as SyncSession from app.config import settings as cfg from app.models.cad_file import CadFile, ProcessingStatus from app.models.product import Product from app.core.tenant_context import set_tenant_context_sync sync_url = cfg.database_url.replace("+asyncpg", "") eng = create_engine(sync_url) updated = 0 failed = 0 try: with SyncSession(eng) as session: cad_files = session.execute( sql_select(CadFile).where( CadFile.processing_status == ProcessingStatus.completed, CadFile.stored_path.isnot(None), ) ).scalars().all() cad_entries = [(str(cf.id), cf.stored_path, cf.tenant_id) for cf in cad_files] for cad_file_id, step_path, tenant_id in cad_entries: try: from app.services.step_processor import extract_rich_metadata rich_meta = extract_rich_metadata(str(step_path)) if rich_meta and rich_meta.get("part_count", 0) > 0: with SyncSession(eng) as session: set_tenant_context_sync(session, tenant_id) # Update mesh_attributes on cad_file cad_file = session.get(CadFile, cad_file_id) if cad_file: existing_attrs = cad_file.mesh_attributes or {} existing_attrs["rich_metadata"] = rich_meta session.execute( sql_update(CadFile) .where(CadFile.id == cad_file_id) .values(mesh_attributes=existing_attrs) ) # Update all active products linked to this CAD file session.execute( sql_update(Product) .where(Product.cad_file_id == cad_file_id, Product.is_active.is_(True)) .values(cad_metadata=rich_meta) ) session.commit() updated += 1 logger.info( f"reextract_rich_metadata: {cad_file_id} -> " f"{rich_meta.get('part_count')} parts, " f"{rich_meta.get('total_volume_cm3', 0):.1f} cm3" ) except Exception: failed += 1 logger.exception(f"reextract_rich_metadata failed for cad_file {cad_file_id}") finally: eng.dispose() logger.info(f"reextract_rich_metadata_task complete: {updated} updated, {failed} failed") @celery_app.task(name="app.tasks.step_tasks.reextract_cad_metadata", queue="asset_pipeline") def reextract_cad_metadata(cad_file_id: str): """Re-extract bounding-box dimensions for an already-completed CAD file. Uses cadquery (available in render-worker) to compute dimensions_mm. Updates mesh_attributes without changing processing_status or re-rendering. Safe to run on completed files. """ from sqlalchemy import create_engine from sqlalchemy.orm import Session from app.config import settings as app_settings from app.models.cad_file import CadFile from app.domains.rendering.workflow_runtime_services import resolve_cad_bbox pl = PipelineLogger(task_id=None) pl.step_start("reextract_cad_metadata", {"cad_file_id": cad_file_id}) # Resolve and log tenant context at task start (required for RLS) from app.core.tenant_context import resolve_tenant_id_for_cad, set_tenant_context_sync _tenant_id = resolve_tenant_id_for_cad(cad_file_id) sync_url = app_settings.database_url.replace("+asyncpg", "") eng = create_engine(sync_url) with Session(eng) as session: set_tenant_context_sync(session, _tenant_id) cad_file = session.get(CadFile, cad_file_id) if not cad_file or not cad_file.stored_path: logger.warning(f"reextract_cad_metadata: file not found {cad_file_id}") eng.dispose() return step_path = cad_file.stored_path try: p = Path(step_path) glb_path = p.parent / f"{p.stem}_thumbnail.glb" bbox_result = resolve_cad_bbox(step_path, glb_path=str(glb_path)) patch = bbox_result.bbox_data if patch: with Session(eng) as session: set_tenant_context_sync(session, _tenant_id) cad_file = session.get(CadFile, cad_file_id) if cad_file: cad_file.mesh_attributes = {**(cad_file.mesh_attributes or {}), **patch} session.commit() dims = patch["dimensions_mm"] pl.step_done("reextract_cad_metadata", result={ "dimensions_mm": f"{dims['x']}×{dims['y']}×{dims['z']} mm" }) logger.info( f"reextract_cad_metadata: {cad_file_id} → " f"{dims['x']}×{dims['y']}×{dims['z']} mm" ) else: logger.warning(f"reextract_cad_metadata: no bbox data for {cad_file_id}") except Exception as exc: pl.step_error("reextract_cad_metadata", str(exc), exc) logger.error(f"reextract_cad_metadata failed for {cad_file_id}: {exc}") finally: eng.dispose()