feat: rich product metadata extraction from STEP files

Extract volume, surface area, part count, assembly hierarchy, and complexity from STEP files via OCC B-rep analysis. Backend: - extract_rich_metadata() in step_processor.py: computes per-part volume (BRepGProp), surface area, triangle/vertex count, assembly depth, instance count, complexity score, largest part identification - cad_metadata JSONB column on Product model (DB migration) - Auto-populated during STEP processing (non-fatal, 10s timeout) - Also stored in cad_files.mesh_attributes["rich_metadata"] - Batch re-extract endpoint: POST /admin/settings/reextract-rich-metadata AI Agent: - search_products returns part_count, volume_cm3, complexity, largest_part - query_database tool description documents cad_metadata schema Frontend: - ProductDetail page: CAD Metadata section with stat cards (parts, volume, surface area, complexity, triangles, assembly depth) - Admin System Tools: "Re-extract Rich Metadata" button for backfill Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-15 18:49:50 +01:00
parent 0ffc86589a
commit cfccdd5397
12 changed files with 645 additions and 170 deletions
@@ -532,6 +532,16 @@ async def reextract_all_metadata(
    return {"queued": queued, "message": f"Queued {queued} CAD file(s) for metadata re-extraction"}


+@router.post("/settings/reextract-rich-metadata", status_code=status.HTTP_202_ACCEPTED)
+async def reextract_rich_metadata(
+    admin: User = Depends(require_global_admin),
+):
+    """Queue a batch task to re-compute volume, surface area, complexity for all products with STEP files."""
+    from app.tasks.step_tasks import reextract_rich_metadata_task
+    reextract_rich_metadata_task.delay()
+    return {"queued": True, "message": "Rich metadata re-extraction task queued"}
+
+
@router.post("/settings/generate-missing-canonical-scenes", status_code=status.HTTP_202_ACCEPTED)
 async def generate_missing_canonical_scenes(
    admin: User = Depends(require_global_admin),
@@ -109,6 +109,57 @@ def process_step_file(self, cad_file_id: str):
            pl.step_error("process_step_file", f"STEP metadata extraction failed: {exc}", exc)
            r.delete(lock_key)  # release lock so a retry can proceed
            raise self.retry(exc=exc, countdown=60, max_retries=3)
+
+        # Extract rich metadata (volume, surface area, complexity, etc.) — non-fatal
+        try:
+            from sqlalchemy import create_engine, update as sql_update
+            from sqlalchemy.orm import Session as SyncSession
+            from app.config import settings as cfg
+            from app.services.step_processor import extract_rich_metadata
+            from app.models.cad_file import CadFile
+            from app.models.product import Product
+            from app.core.tenant_context import set_tenant_context_sync
+
+            eng = create_engine(cfg.database_url_sync)
+            try:
+                # Load stored_path for the cad file
+                with SyncSession(eng) as session:
+                    set_tenant_context_sync(session, _tenant_id)
+                    cad_file = session.get(CadFile, cad_file_id)
+                    step_path = cad_file.stored_path if cad_file else None
+
+                if step_path:
+                    rich_meta = extract_rich_metadata(str(step_path))
+                    if rich_meta and rich_meta.get("part_count", 0) > 0:
+                        with SyncSession(eng) as session:
+                            set_tenant_context_sync(session, _tenant_id)
+                            # Merge into cad_files.mesh_attributes
+                            cad_file = session.get(CadFile, cad_file_id)
+                            if cad_file:
+                                existing_attrs = cad_file.mesh_attributes or {}
+                                existing_attrs["rich_metadata"] = rich_meta
+                                session.execute(
+                                    sql_update(CadFile)
+                                    .where(CadFile.id == cad_file_id)
+                                    .values(mesh_attributes=existing_attrs)
+                                )
+                            # Update all active products linked to this CAD file
+                            session.execute(
+                                sql_update(Product)
+                                .where(Product.cad_file_id == cad_file_id, Product.is_active.is_(True))
+                                .values(cad_metadata=rich_meta)
+                            )
+                            session.commit()
+                        logger.info(
+                            f"Rich metadata extracted for cad_file {cad_file_id}: "
+                            f"{rich_meta.get('part_count')} parts, "
+                            f"{rich_meta.get('total_volume_cm3', 0):.1f} cm³"
+                        )
+            finally:
+                eng.dispose()
+        except Exception:
+            logger.exception(f"Rich metadata extraction failed for cad_file {cad_file_id} (non-fatal)")
+
    finally:
        r.delete(lock_key)  # always release on completion or unhandled error

@@ -203,6 +254,70 @@ def _auto_populate_materials_for_cad(cad_file_id: str, tenant_id: str | None = N
    eng.dispose()


+@celery_app.task(name="app.tasks.step_tasks.reextract_rich_metadata_task", queue="step_processing")
+def reextract_rich_metadata_task():
+    """Batch re-extract rich metadata (volume, surface area, complexity) for all completed CAD files."""
+    from sqlalchemy import create_engine, select as sql_select, update as sql_update
+    from sqlalchemy.orm import Session as SyncSession
+    from app.config import settings as cfg
+    from app.models.cad_file import CadFile, ProcessingStatus
+    from app.models.product import Product
+    from app.core.tenant_context import set_tenant_context_sync
+
+    sync_url = cfg.database_url.replace("+asyncpg", "")
+    eng = create_engine(sync_url)
+    updated = 0
+    failed = 0
+
+    try:
+        with SyncSession(eng) as session:
+            cad_files = session.execute(
+                sql_select(CadFile).where(
+                    CadFile.processing_status == ProcessingStatus.completed,
+                    CadFile.stored_path.isnot(None),
+                )
+            ).scalars().all()
+            cad_entries = [(str(cf.id), cf.stored_path, cf.tenant_id) for cf in cad_files]
+
+        for cad_file_id, step_path, tenant_id in cad_entries:
+            try:
+                from app.services.step_processor import extract_rich_metadata
+                rich_meta = extract_rich_metadata(str(step_path))
+                if rich_meta and rich_meta.get("part_count", 0) > 0:
+                    with SyncSession(eng) as session:
+                        set_tenant_context_sync(session, tenant_id)
+                        # Update mesh_attributes on cad_file
+                        cad_file = session.get(CadFile, cad_file_id)
+                        if cad_file:
+                            existing_attrs = cad_file.mesh_attributes or {}
+                            existing_attrs["rich_metadata"] = rich_meta
+                            session.execute(
+                                sql_update(CadFile)
+                                .where(CadFile.id == cad_file_id)
+                                .values(mesh_attributes=existing_attrs)
+                            )
+                        # Update all active products linked to this CAD file
+                        session.execute(
+                            sql_update(Product)
+                            .where(Product.cad_file_id == cad_file_id, Product.is_active.is_(True))
+                            .values(cad_metadata=rich_meta)
+                        )
+                        session.commit()
+                    updated += 1
+                    logger.info(
+                        f"reextract_rich_metadata: {cad_file_id} -> "
+                        f"{rich_meta.get('part_count')} parts, "
+                        f"{rich_meta.get('total_volume_cm3', 0):.1f} cm3"
+                    )
+            except Exception:
+                failed += 1
+                logger.exception(f"reextract_rich_metadata failed for cad_file {cad_file_id}")
+    finally:
+        eng.dispose()
+
+    logger.info(f"reextract_rich_metadata_task complete: {updated} updated, {failed} failed")
+
+
@celery_app.task(name="app.tasks.step_tasks.reextract_cad_metadata", queue="asset_pipeline")
 def reextract_cad_metadata(cad_file_id: str):
    """Re-extract bounding-box dimensions for an already-completed CAD file.
@@ -70,6 +70,7 @@ class Product(Base):
    is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True)
    arbeitspaket: Mapped[str | None] = mapped_column(String(500), nullable=True)
    source_excel: Mapped[str | None] = mapped_column(String(1000), nullable=True)
+    cad_metadata: Mapped[dict | None] = mapped_column(JSONB, nullable=True, default=None)
    tenant_id: Mapped[uuid.UUID | None] = mapped_column(
        UUID(as_uuid=True), ForeignKey("tenants.id"), nullable=True, index=True
    )
@@ -64,6 +64,7 @@ class ProductOut(BaseModel):
    cad_mesh_attributes: dict | None = None
    arbeitspaket: str | None = None
    cad_render_log: dict | None = None
+    cad_metadata: dict | None = None
    notes: str | None
    is_active: bool
    source_excel: str | None
@@ -219,7 +219,7 @@ TOOLS = [
        "type": "function",
        "function": {
            "name": "query_database",
-            "description": "Execute a read-only SQL SELECT query against the database. Key tables/columns: products(id, name, pim_id, category_key, cad_file_id, is_active, tenant_id), orders(id, order_number, status, tenant_id), order_lines(id, order_id, product_id, render_status, material_override, render_overrides), cad_files(id, mesh_attributes->'dimensions_mm' with {x,y,z} in mm, parsed_objects, processing_status). To get product dimensions: JOIN cad_files cf ON cf.id = p.cad_file_id and use cf.mesh_attributes->'dimensions_mm'. Use :tenant_id parameter for tenant filtering. Category is 'category_key' not 'category'.",
+            "description": "Execute a read-only SQL SELECT query against the database. Key tables/columns: products(id, name, pim_id, category_key, cad_file_id, is_active, tenant_id, cad_metadata JSONB), orders(id, order_number, status, tenant_id), order_lines(id, order_id, product_id, render_status, material_override, render_overrides), cad_files(id, mesh_attributes->'dimensions_mm' with {x,y,z} in mm, parsed_objects, processing_status). products.cad_metadata JSONB contains: part_count, unique_part_count, instance_count, assembly_depth, total_volume_cm3, total_surface_area_cm2, total_triangle_count, complexity_score, largest_part (name + volume_cm3). To get product dimensions: JOIN cad_files cf ON cf.id = p.cad_file_id and use cf.mesh_attributes->'dimensions_mm'. Use :tenant_id parameter for tenant filtering. Category is 'category_key' not 'category'.",
            "parameters": {
                "type": "object",
                "properties": {
@@ -330,7 +330,12 @@ async def _tool_search_products(db: AsyncSession, tenant_id: str, query: str = "
               cf.processing_status,
               cf.mesh_attributes->'dimensions_mm'->>'x' AS dim_x_mm,
               cf.mesh_attributes->'dimensions_mm'->>'y' AS dim_y_mm,
-               cf.mesh_attributes->'dimensions_mm'->>'z' AS dim_z_mm
+               cf.mesh_attributes->'dimensions_mm'->>'z' AS dim_z_mm,
+               p.cad_metadata->>'part_count' AS part_count,
+               p.cad_metadata->>'total_volume_cm3' AS volume_cm3,
+               p.cad_metadata->>'complexity_score' AS complexity,
+               p.cad_metadata->'largest_part'->>'name' AS largest_part_name,
+               p.cad_metadata->'largest_part'->>'volume_cm3' AS largest_part_volume
        FROM products p
        LEFT JOIN cad_files cf ON cf.id = p.cad_file_id
        WHERE p.tenant_id = :tenant_id
@@ -632,6 +632,332 @@ def extract_step_metadata(step_path: str) -> StepMetadata:
        return StepMetadata()


+def extract_rich_metadata(step_path: str) -> dict:
+    """Extract rich geometric metadata from a STEP file.
+
+    Opens the STEP file via XCAF, walks the assembly tree, and computes
+    volume, surface area, hierarchy depth, triangle/vertex counts, and
+    complexity metrics for each unique part.
+
+    Runs in the worker container (step_processing queue) which has OCP installed.
+    Processing is capped at 10 seconds; volume computation is skipped for files
+    with more than 200 parts.
+
+    Returns a dict with keys: part_count, unique_part_count, instance_count,
+    assembly_depth, total_volume_cm3, total_surface_area_cm2,
+    total_triangle_count, total_vertex_count, largest_part,
+    smallest_dimension_mm, complexity_score.
+    """
+    import time
+
+    t_start = time.monotonic()
+    TIME_LIMIT = 10.0  # seconds
+
+    result: dict = {
+        "part_count": 0,
+        "unique_part_count": 0,
+        "instance_count": 0,
+        "assembly_depth": 0,
+        "total_volume_cm3": 0.0,
+        "total_surface_area_cm2": 0.0,
+        "total_triangle_count": 0,
+        "total_vertex_count": 0,
+        "largest_part": {"name": "", "volume_cm3": 0.0},
+        "smallest_dimension_mm": 0.0,
+        "complexity_score": "low",
+    }
+
+    try:
+        # Import OCC — try OCC.Core first, fall back to OCP
+        _using_ocp = False
+        try:
+            from OCC.Core.STEPCAFControl import STEPCAFControl_Reader
+            from OCC.Core.XCAFDoc import XCAFDoc_DocumentTool
+            from OCC.Core.TDocStd import TDocStd_Document
+            from OCC.Core.TDataStd import TDataStd_Name
+            from OCC.Core.TCollection import TCollection_ExtendedString
+            from OCC.Core.TDF import TDF_LabelSequence
+            from OCC.Core.XCAFDoc import XCAFDoc_ShapeTool
+            from OCC.Core.BRepGProp import brepgprop
+            from OCC.Core.GProp import GProp_GProps
+            from OCC.Core.BRepMesh import BRepMesh_IncrementalMesh
+            from OCC.Core.TopExp import TopExp_Explorer
+            from OCC.Core.TopAbs import TopAbs_FACE
+            from OCC.Core.TopoDS import TopoDS as _TopoDS
+            from OCC.Core.BRep import BRep_Tool
+            from OCC.Core.TopLoc import TopLoc_Location
+            from OCC.Core.Bnd import Bnd_Box
+            from OCC.Core.BRepBndLib import brepbndlib as _brepbndlib_mod
+
+            def _get_components(label, seq):
+                XCAFDoc_ShapeTool.GetComponents(label, seq)
+            def _is_reference(label):
+                return XCAFDoc_ShapeTool.IsReference(label)
+            def _get_referred(label, ref):
+                return XCAFDoc_ShapeTool.GetReferredShape(label, ref)
+            def _get_shape(st, label):
+                return st.GetShape(label)
+            def _get_name_id():
+                return TDataStd_Name.GetID()
+            def _brepbndlib_add(shape, bbox):
+                _brepbndlib_mod.Add(shape, bbox)
+        except ImportError:
+            from OCP.STEPCAFControl import STEPCAFControl_Reader  # type: ignore[no-redef]
+            from OCP.XCAFDoc import XCAFDoc_DocumentTool  # type: ignore[no-redef]
+            from OCP.TDocStd import TDocStd_Document  # type: ignore[no-redef]
+            from OCP.TDataStd import TDataStd_Name  # type: ignore[no-redef]
+            from OCP.TCollection import TCollection_ExtendedString  # type: ignore[no-redef]
+            from OCP.TDF import TDF_LabelSequence, TDF_Label  # type: ignore[no-redef]
+            from OCP.XCAFDoc import XCAFDoc_ShapeTool  # type: ignore[no-redef]
+            from OCP.BRepGProp import brepgprop  # type: ignore[no-redef]
+            from OCP.GProp import GProp_GProps  # type: ignore[no-redef]
+            from OCP.BRepMesh import BRepMesh_IncrementalMesh  # type: ignore[no-redef]
+            from OCP.TopExp import TopExp_Explorer  # type: ignore[no-redef]
+            from OCP.TopAbs import TopAbs_FACE  # type: ignore[no-redef]
+            from OCP.TopoDS import TopoDS as _TopoDS  # type: ignore[no-redef]
+            from OCP.BRep import BRep_Tool  # type: ignore[no-redef]
+            from OCP.TopLoc import TopLoc_Location  # type: ignore[no-redef]
+            from OCP.Bnd import Bnd_Box  # type: ignore[no-redef]
+            from OCP.BRepBndLib import BRepBndLib as _brepbndlib_mod  # type: ignore[no-redef]
+            _using_ocp = True
+
+            def _get_components(label, seq):
+                XCAFDoc_ShapeTool.GetComponents_s(label, seq)
+            def _is_reference(label):
+                return XCAFDoc_ShapeTool.IsReference_s(label)
+            def _get_referred(label, ref):
+                return XCAFDoc_ShapeTool.GetReferredShape_s(label, ref)
+            def _get_shape(st, label):
+                return st.GetShape_s(label)
+            def _get_name_id():
+                return TDataStd_Name.GetID_s()
+            def _brepbndlib_add(shape, bbox):
+                _brepbndlib_mod.Add_s(shape, bbox)
+
+        # ── Read STEP file ────────────────────────────────────────────────
+        doc = TDocStd_Document(TCollection_ExtendedString("MDTV-CAF"))
+        reader = STEPCAFControl_Reader()
+        reader.SetColorMode(True)
+        reader.SetNameMode(True)
+        status = reader.ReadFile(str(step_path))
+        if not reader.Transfer(doc):
+            logger.warning("extract_rich_metadata: XCAF transfer failed for %s", step_path)
+            return result
+
+        if _using_ocp:
+            shape_tool = XCAFDoc_DocumentTool.ShapeTool_s(doc.Main())
+        else:
+            shape_tool = XCAFDoc_DocumentTool.ShapeTool(doc.Main())
+
+        free_labels = TDF_LabelSequence() if _using_ocp else []
+        if _using_ocp:
+            shape_tool.GetFreeShapes(free_labels)
+        else:
+            shape_tool.GetFreeShapes(free_labels)
+
+        # ── Walk the XCAF assembly tree ───────────────────────────────────
+        # Collect all leaf shapes with their names, tracking unique shapes via IsSame()
+        leaf_shapes: list[tuple] = []  # (name, shape)
+        unique_shapes: list = []       # list of (name, shape) for distinct shapes
+        max_depth = 0
+
+        def _label_name(label) -> str:
+            name_attr = TDataStd_Name()
+            if label.FindAttribute(_get_name_id(), name_attr):
+                return name_attr.Get().ToExtString()
+            return ""
+
+        def _walk(label, depth: int) -> None:
+            nonlocal max_depth
+            if depth > max_depth:
+                max_depth = depth
+
+            # Dereference component references
+            actual_label = label
+            if _is_reference(label):
+                if _using_ocp:
+                    ref_label = TDF_Label()
+                    if _get_referred(label, ref_label):
+                        actual_label = ref_label
+                else:
+                    from OCC.Core.TDF import TDF_Label as _TDF_Label
+                    ref_label = _TDF_Label()
+                    if _get_referred(label, ref_label):
+                        actual_label = ref_label
+
+            components = TDF_LabelSequence() if _using_ocp else []
+            _get_components(actual_label, components)
+
+            n_components = components.Length() if _using_ocp else len(components)
+            if n_components == 0:
+                # Leaf node
+                name = _label_name(label) or _label_name(actual_label)
+                shape = _get_shape(shape_tool, actual_label)
+                if shape is not None and not shape.IsNull():
+                    leaf_shapes.append((name, shape))
+                    # Check uniqueness via IsSame
+                    is_unique = True
+                    for _, existing_shape in unique_shapes:
+                        if shape.IsSame(existing_shape):
+                            is_unique = False
+                            break
+                    if is_unique:
+                        unique_shapes.append((name, shape))
+            else:
+                if _using_ocp:
+                    for i in range(1, n_components + 1):
+                        _walk(components.Value(i), depth + 1)
+                else:
+                    for child in components:
+                        _walk(child, depth + 1)
+
+        n_free = free_labels.Length() if _using_ocp else len(free_labels)
+        for i in range(1, n_free + 1) if _using_ocp else range(len(free_labels)):
+            label = free_labels.Value(i) if _using_ocp else free_labels[i]
+            _walk(label, 0)
+
+        result["part_count"] = len(leaf_shapes)
+        result["unique_part_count"] = len(unique_shapes)
+        result["instance_count"] = len(leaf_shapes)
+        result["assembly_depth"] = max_depth
+
+        # ── Volume and surface area per unique shape ──────────────────────
+        skip_volume = len(leaf_shapes) > 200
+        if skip_volume:
+            logger.info(
+                "extract_rich_metadata: %d parts > 200, skipping volume computation",
+                len(leaf_shapes),
+            )
+
+        total_volume = 0.0       # mm³
+        total_area = 0.0         # mm²
+        largest_name = ""
+        largest_volume = 0.0     # mm³
+
+        # Build a count of how many instances each unique shape has
+        instance_counts: dict[int, int] = {}  # index in unique_shapes → count
+        for _, leaf_shape in leaf_shapes:
+            for idx, (_, u_shape) in enumerate(unique_shapes):
+                if leaf_shape.IsSame(u_shape):
+                    instance_counts[idx] = instance_counts.get(idx, 0) + 1
+                    break
+
+        if not skip_volume:
+            for idx, (name, shape) in enumerate(unique_shapes):
+                if time.monotonic() - t_start > TIME_LIMIT:
+                    logger.warning("extract_rich_metadata: time limit reached, stopping volume computation")
+                    break
+
+                count = instance_counts.get(idx, 1)
+                try:
+                    props = GProp_GProps()
+                    if _using_ocp:
+                        brepgprop.VolumeProperties_s(shape, props)
+                    else:
+                        brepgprop.VolumeProperties(shape, props)
+                    vol = abs(props.Mass())  # mm³, abs() for reversed shapes
+                    total_volume += vol * count
+                    if vol > largest_volume:
+                        largest_volume = vol
+                        largest_name = name
+                except Exception:
+                    pass
+
+                try:
+                    props = GProp_GProps()
+                    if _using_ocp:
+                        brepgprop.SurfaceProperties_s(shape, props)
+                    else:
+                        brepgprop.SurfaceProperties(shape, props)
+                    area = abs(props.Mass())  # mm²
+                    total_area += area * count
+                except Exception:
+                    pass
+
+        result["total_volume_cm3"] = round(total_volume / 1000.0, 2)     # mm³ → cm³
+        result["total_surface_area_cm2"] = round(total_area / 100.0, 2)  # mm² → cm²
+        result["largest_part"] = {
+            "name": largest_name,
+            "volume_cm3": round(largest_volume / 1000.0, 2),
+        }
+
+        # ── Smallest dimension across all parts ──────────────────────────
+        smallest_dim = float("inf")
+        for _, shape in unique_shapes:
+            if time.monotonic() - t_start > TIME_LIMIT:
+                break
+            try:
+                bbox = Bnd_Box()
+                _brepbndlib_add(shape, bbox)
+                xmin, ymin, zmin, xmax, ymax, zmax = bbox.Get()
+                dims = [abs(xmax - xmin), abs(ymax - ymin), abs(zmax - zmin)]
+                min_dim = min(d for d in dims if d > 1e-6)  # skip degenerate
+                if min_dim < smallest_dim:
+                    smallest_dim = min_dim
+            except Exception:
+                pass
+        result["smallest_dimension_mm"] = round(smallest_dim, 2) if smallest_dim < float("inf") else 0.0
+
+        # ── Triangle and vertex counts from tessellation ──────────────────
+        # Tessellate all root shapes first (coarse, for counting only)
+        total_triangles = 0
+        total_vertices = 0
+        for i in range(1, n_free + 1) if _using_ocp else range(len(free_labels)):
+            label = free_labels.Value(i) if _using_ocp else free_labels[i]
+            shape = _get_shape(shape_tool, label)
+            if shape is not None and not shape.IsNull():
+                BRepMesh_IncrementalMesh(shape, 0.5, False, 0.5)
+
+                # Walk faces and sum Poly_Triangulation data
+                explorer = TopExp_Explorer(shape, TopAbs_FACE)
+                while explorer.More():
+                    face = _TopoDS.Face_s(explorer.Current()) if _using_ocp \
+                        else _TopoDS.Face(explorer.Current())
+                    try:
+                        loc = TopLoc_Location()
+                        if _using_ocp:
+                            tri = BRep_Tool.Triangulation_s(face, loc)
+                        else:
+                            tri = BRep_Tool.Triangulation(face, loc)
+                        if tri is not None:
+                            total_triangles += tri.NbTriangles()
+                            total_vertices += tri.NbNodes()
+                    except Exception:
+                        pass
+                    explorer.Next()
+
+        result["total_triangle_count"] = total_triangles
+        result["total_vertex_count"] = total_vertices
+
+        # ── Complexity score ──────────────────────────────────────────────
+        if total_triangles < 5000:
+            result["complexity_score"] = "low"
+        elif total_triangles <= 50000:
+            result["complexity_score"] = "medium"
+        else:
+            result["complexity_score"] = "high"
+
+        elapsed = time.monotonic() - t_start
+        logger.info(
+            "extract_rich_metadata: %d parts (%d unique), %.1f cm³, %d tris, "
+            "complexity=%s, %.2fs",
+            result["part_count"],
+            result["unique_part_count"],
+            result["total_volume_cm3"],
+            result["total_triangle_count"],
+            result["complexity_score"],
+            elapsed,
+        )
+        return result
+
+    except ImportError:
+        logger.warning("OCC not available for extract_rich_metadata")
+        return result
+    except Exception as exc:
+        logger.warning("extract_rich_metadata failed: %s", exc)
+        return result
+
+
 def _extract_step_objects(step_path: Path) -> list[str]:
    """Extract part names from STEP file using pythonocc."""
    try:
@@ -8,6 +8,7 @@ Celery discovers tasks by import path, so these re-exports are required.
 from app.domains.pipeline.tasks.extract_metadata import (  # noqa: F401
    process_step_file,
    reextract_cad_metadata,
+    reextract_rich_metadata_task,
 )
 from app.domains.pipeline.tasks.render_thumbnail import (  # noqa: F401
    render_step_thumbnail,