diff --git a/backend/alembic/versions/a68e8c6fb61b_add_cad_metadata_to_products.py b/backend/alembic/versions/a68e8c6fb61b_add_cad_metadata_to_products.py new file mode 100644 index 0000000..66ab7db --- /dev/null +++ b/backend/alembic/versions/a68e8c6fb61b_add_cad_metadata_to_products.py @@ -0,0 +1,30 @@ +"""add cad_metadata to products + +Revision ID: a68e8c6fb61b +Revises: 69964e910545 +Create Date: 2026-03-15 17:44:08.184376 + +""" +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision: str = 'a68e8c6fb61b' +down_revision: Union[str, None] = '69964e910545' +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('products', sa.Column('cad_metadata', postgresql.JSONB(astext_type=sa.Text()), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('products', 'cad_metadata') + # ### end Alembic commands ### diff --git a/backend/app/api/routers/admin.py b/backend/app/api/routers/admin.py index c7e52fb..9872458 100644 --- a/backend/app/api/routers/admin.py +++ b/backend/app/api/routers/admin.py @@ -532,6 +532,16 @@ async def reextract_all_metadata( return {"queued": queued, "message": f"Queued {queued} CAD file(s) for metadata re-extraction"} +@router.post("/settings/reextract-rich-metadata", status_code=status.HTTP_202_ACCEPTED) +async def reextract_rich_metadata( + admin: User = Depends(require_global_admin), +): + """Queue a batch task to re-compute volume, surface area, complexity for all products with STEP files.""" + from app.tasks.step_tasks import reextract_rich_metadata_task + reextract_rich_metadata_task.delay() + return {"queued": True, "message": "Rich metadata re-extraction task queued"} + + @router.post("/settings/generate-missing-canonical-scenes", status_code=status.HTTP_202_ACCEPTED) async def generate_missing_canonical_scenes( admin: User = Depends(require_global_admin), diff --git a/backend/app/domains/pipeline/tasks/extract_metadata.py b/backend/app/domains/pipeline/tasks/extract_metadata.py index 98df43e..ffc96aa 100644 --- a/backend/app/domains/pipeline/tasks/extract_metadata.py +++ b/backend/app/domains/pipeline/tasks/extract_metadata.py @@ -109,6 +109,57 @@ def process_step_file(self, cad_file_id: str): pl.step_error("process_step_file", f"STEP metadata extraction failed: {exc}", exc) r.delete(lock_key) # release lock so a retry can proceed raise self.retry(exc=exc, countdown=60, max_retries=3) + + # Extract rich metadata (volume, surface area, complexity, etc.) — non-fatal + try: + from sqlalchemy import create_engine, update as sql_update + from sqlalchemy.orm import Session as SyncSession + from app.config import settings as cfg + from app.services.step_processor import extract_rich_metadata + from app.models.cad_file import CadFile + from app.models.product import Product + from app.core.tenant_context import set_tenant_context_sync + + eng = create_engine(cfg.database_url_sync) + try: + # Load stored_path for the cad file + with SyncSession(eng) as session: + set_tenant_context_sync(session, _tenant_id) + cad_file = session.get(CadFile, cad_file_id) + step_path = cad_file.stored_path if cad_file else None + + if step_path: + rich_meta = extract_rich_metadata(str(step_path)) + if rich_meta and rich_meta.get("part_count", 0) > 0: + with SyncSession(eng) as session: + set_tenant_context_sync(session, _tenant_id) + # Merge into cad_files.mesh_attributes + cad_file = session.get(CadFile, cad_file_id) + if cad_file: + existing_attrs = cad_file.mesh_attributes or {} + existing_attrs["rich_metadata"] = rich_meta + session.execute( + sql_update(CadFile) + .where(CadFile.id == cad_file_id) + .values(mesh_attributes=existing_attrs) + ) + # Update all active products linked to this CAD file + session.execute( + sql_update(Product) + .where(Product.cad_file_id == cad_file_id, Product.is_active.is_(True)) + .values(cad_metadata=rich_meta) + ) + session.commit() + logger.info( + f"Rich metadata extracted for cad_file {cad_file_id}: " + f"{rich_meta.get('part_count')} parts, " + f"{rich_meta.get('total_volume_cm3', 0):.1f} cm³" + ) + finally: + eng.dispose() + except Exception: + logger.exception(f"Rich metadata extraction failed for cad_file {cad_file_id} (non-fatal)") + finally: r.delete(lock_key) # always release on completion or unhandled error @@ -203,6 +254,70 @@ def _auto_populate_materials_for_cad(cad_file_id: str, tenant_id: str | None = N eng.dispose() +@celery_app.task(name="app.tasks.step_tasks.reextract_rich_metadata_task", queue="step_processing") +def reextract_rich_metadata_task(): + """Batch re-extract rich metadata (volume, surface area, complexity) for all completed CAD files.""" + from sqlalchemy import create_engine, select as sql_select, update as sql_update + from sqlalchemy.orm import Session as SyncSession + from app.config import settings as cfg + from app.models.cad_file import CadFile, ProcessingStatus + from app.models.product import Product + from app.core.tenant_context import set_tenant_context_sync + + sync_url = cfg.database_url.replace("+asyncpg", "") + eng = create_engine(sync_url) + updated = 0 + failed = 0 + + try: + with SyncSession(eng) as session: + cad_files = session.execute( + sql_select(CadFile).where( + CadFile.processing_status == ProcessingStatus.completed, + CadFile.stored_path.isnot(None), + ) + ).scalars().all() + cad_entries = [(str(cf.id), cf.stored_path, cf.tenant_id) for cf in cad_files] + + for cad_file_id, step_path, tenant_id in cad_entries: + try: + from app.services.step_processor import extract_rich_metadata + rich_meta = extract_rich_metadata(str(step_path)) + if rich_meta and rich_meta.get("part_count", 0) > 0: + with SyncSession(eng) as session: + set_tenant_context_sync(session, tenant_id) + # Update mesh_attributes on cad_file + cad_file = session.get(CadFile, cad_file_id) + if cad_file: + existing_attrs = cad_file.mesh_attributes or {} + existing_attrs["rich_metadata"] = rich_meta + session.execute( + sql_update(CadFile) + .where(CadFile.id == cad_file_id) + .values(mesh_attributes=existing_attrs) + ) + # Update all active products linked to this CAD file + session.execute( + sql_update(Product) + .where(Product.cad_file_id == cad_file_id, Product.is_active.is_(True)) + .values(cad_metadata=rich_meta) + ) + session.commit() + updated += 1 + logger.info( + f"reextract_rich_metadata: {cad_file_id} -> " + f"{rich_meta.get('part_count')} parts, " + f"{rich_meta.get('total_volume_cm3', 0):.1f} cm3" + ) + except Exception: + failed += 1 + logger.exception(f"reextract_rich_metadata failed for cad_file {cad_file_id}") + finally: + eng.dispose() + + logger.info(f"reextract_rich_metadata_task complete: {updated} updated, {failed} failed") + + @celery_app.task(name="app.tasks.step_tasks.reextract_cad_metadata", queue="asset_pipeline") def reextract_cad_metadata(cad_file_id: str): """Re-extract bounding-box dimensions for an already-completed CAD file. diff --git a/backend/app/domains/products/models.py b/backend/app/domains/products/models.py index 684c561..e7db153 100644 --- a/backend/app/domains/products/models.py +++ b/backend/app/domains/products/models.py @@ -70,6 +70,7 @@ class Product(Base): is_active: Mapped[bool] = mapped_column(Boolean, nullable=False, default=True) arbeitspaket: Mapped[str | None] = mapped_column(String(500), nullable=True) source_excel: Mapped[str | None] = mapped_column(String(1000), nullable=True) + cad_metadata: Mapped[dict | None] = mapped_column(JSONB, nullable=True, default=None) tenant_id: Mapped[uuid.UUID | None] = mapped_column( UUID(as_uuid=True), ForeignKey("tenants.id"), nullable=True, index=True ) diff --git a/backend/app/domains/products/schemas.py b/backend/app/domains/products/schemas.py index 5b8d83d..b5e64cf 100644 --- a/backend/app/domains/products/schemas.py +++ b/backend/app/domains/products/schemas.py @@ -64,6 +64,7 @@ class ProductOut(BaseModel): cad_mesh_attributes: dict | None = None arbeitspaket: str | None = None cad_render_log: dict | None = None + cad_metadata: dict | None = None notes: str | None is_active: bool source_excel: str | None diff --git a/backend/app/services/chat_service.py b/backend/app/services/chat_service.py index 671278c..f910e77 100644 --- a/backend/app/services/chat_service.py +++ b/backend/app/services/chat_service.py @@ -219,7 +219,7 @@ TOOLS = [ "type": "function", "function": { "name": "query_database", - "description": "Execute a read-only SQL SELECT query against the database. Key tables/columns: products(id, name, pim_id, category_key, cad_file_id, is_active, tenant_id), orders(id, order_number, status, tenant_id), order_lines(id, order_id, product_id, render_status, material_override, render_overrides), cad_files(id, mesh_attributes->'dimensions_mm' with {x,y,z} in mm, parsed_objects, processing_status). To get product dimensions: JOIN cad_files cf ON cf.id = p.cad_file_id and use cf.mesh_attributes->'dimensions_mm'. Use :tenant_id parameter for tenant filtering. Category is 'category_key' not 'category'.", + "description": "Execute a read-only SQL SELECT query against the database. Key tables/columns: products(id, name, pim_id, category_key, cad_file_id, is_active, tenant_id, cad_metadata JSONB), orders(id, order_number, status, tenant_id), order_lines(id, order_id, product_id, render_status, material_override, render_overrides), cad_files(id, mesh_attributes->'dimensions_mm' with {x,y,z} in mm, parsed_objects, processing_status). products.cad_metadata JSONB contains: part_count, unique_part_count, instance_count, assembly_depth, total_volume_cm3, total_surface_area_cm2, total_triangle_count, complexity_score, largest_part (name + volume_cm3). To get product dimensions: JOIN cad_files cf ON cf.id = p.cad_file_id and use cf.mesh_attributes->'dimensions_mm'. Use :tenant_id parameter for tenant filtering. Category is 'category_key' not 'category'.", "parameters": { "type": "object", "properties": { @@ -330,7 +330,12 @@ async def _tool_search_products(db: AsyncSession, tenant_id: str, query: str = " cf.processing_status, cf.mesh_attributes->'dimensions_mm'->>'x' AS dim_x_mm, cf.mesh_attributes->'dimensions_mm'->>'y' AS dim_y_mm, - cf.mesh_attributes->'dimensions_mm'->>'z' AS dim_z_mm + cf.mesh_attributes->'dimensions_mm'->>'z' AS dim_z_mm, + p.cad_metadata->>'part_count' AS part_count, + p.cad_metadata->>'total_volume_cm3' AS volume_cm3, + p.cad_metadata->>'complexity_score' AS complexity, + p.cad_metadata->'largest_part'->>'name' AS largest_part_name, + p.cad_metadata->'largest_part'->>'volume_cm3' AS largest_part_volume FROM products p LEFT JOIN cad_files cf ON cf.id = p.cad_file_id WHERE p.tenant_id = :tenant_id diff --git a/backend/app/services/step_processor.py b/backend/app/services/step_processor.py index 0178054..ea5b019 100644 --- a/backend/app/services/step_processor.py +++ b/backend/app/services/step_processor.py @@ -632,6 +632,332 @@ def extract_step_metadata(step_path: str) -> StepMetadata: return StepMetadata() +def extract_rich_metadata(step_path: str) -> dict: + """Extract rich geometric metadata from a STEP file. + + Opens the STEP file via XCAF, walks the assembly tree, and computes + volume, surface area, hierarchy depth, triangle/vertex counts, and + complexity metrics for each unique part. + + Runs in the worker container (step_processing queue) which has OCP installed. + Processing is capped at 10 seconds; volume computation is skipped for files + with more than 200 parts. + + Returns a dict with keys: part_count, unique_part_count, instance_count, + assembly_depth, total_volume_cm3, total_surface_area_cm2, + total_triangle_count, total_vertex_count, largest_part, + smallest_dimension_mm, complexity_score. + """ + import time + + t_start = time.monotonic() + TIME_LIMIT = 10.0 # seconds + + result: dict = { + "part_count": 0, + "unique_part_count": 0, + "instance_count": 0, + "assembly_depth": 0, + "total_volume_cm3": 0.0, + "total_surface_area_cm2": 0.0, + "total_triangle_count": 0, + "total_vertex_count": 0, + "largest_part": {"name": "", "volume_cm3": 0.0}, + "smallest_dimension_mm": 0.0, + "complexity_score": "low", + } + + try: + # Import OCC — try OCC.Core first, fall back to OCP + _using_ocp = False + try: + from OCC.Core.STEPCAFControl import STEPCAFControl_Reader + from OCC.Core.XCAFDoc import XCAFDoc_DocumentTool + from OCC.Core.TDocStd import TDocStd_Document + from OCC.Core.TDataStd import TDataStd_Name + from OCC.Core.TCollection import TCollection_ExtendedString + from OCC.Core.TDF import TDF_LabelSequence + from OCC.Core.XCAFDoc import XCAFDoc_ShapeTool + from OCC.Core.BRepGProp import brepgprop + from OCC.Core.GProp import GProp_GProps + from OCC.Core.BRepMesh import BRepMesh_IncrementalMesh + from OCC.Core.TopExp import TopExp_Explorer + from OCC.Core.TopAbs import TopAbs_FACE + from OCC.Core.TopoDS import TopoDS as _TopoDS + from OCC.Core.BRep import BRep_Tool + from OCC.Core.TopLoc import TopLoc_Location + from OCC.Core.Bnd import Bnd_Box + from OCC.Core.BRepBndLib import brepbndlib as _brepbndlib_mod + + def _get_components(label, seq): + XCAFDoc_ShapeTool.GetComponents(label, seq) + def _is_reference(label): + return XCAFDoc_ShapeTool.IsReference(label) + def _get_referred(label, ref): + return XCAFDoc_ShapeTool.GetReferredShape(label, ref) + def _get_shape(st, label): + return st.GetShape(label) + def _get_name_id(): + return TDataStd_Name.GetID() + def _brepbndlib_add(shape, bbox): + _brepbndlib_mod.Add(shape, bbox) + except ImportError: + from OCP.STEPCAFControl import STEPCAFControl_Reader # type: ignore[no-redef] + from OCP.XCAFDoc import XCAFDoc_DocumentTool # type: ignore[no-redef] + from OCP.TDocStd import TDocStd_Document # type: ignore[no-redef] + from OCP.TDataStd import TDataStd_Name # type: ignore[no-redef] + from OCP.TCollection import TCollection_ExtendedString # type: ignore[no-redef] + from OCP.TDF import TDF_LabelSequence, TDF_Label # type: ignore[no-redef] + from OCP.XCAFDoc import XCAFDoc_ShapeTool # type: ignore[no-redef] + from OCP.BRepGProp import brepgprop # type: ignore[no-redef] + from OCP.GProp import GProp_GProps # type: ignore[no-redef] + from OCP.BRepMesh import BRepMesh_IncrementalMesh # type: ignore[no-redef] + from OCP.TopExp import TopExp_Explorer # type: ignore[no-redef] + from OCP.TopAbs import TopAbs_FACE # type: ignore[no-redef] + from OCP.TopoDS import TopoDS as _TopoDS # type: ignore[no-redef] + from OCP.BRep import BRep_Tool # type: ignore[no-redef] + from OCP.TopLoc import TopLoc_Location # type: ignore[no-redef] + from OCP.Bnd import Bnd_Box # type: ignore[no-redef] + from OCP.BRepBndLib import BRepBndLib as _brepbndlib_mod # type: ignore[no-redef] + _using_ocp = True + + def _get_components(label, seq): + XCAFDoc_ShapeTool.GetComponents_s(label, seq) + def _is_reference(label): + return XCAFDoc_ShapeTool.IsReference_s(label) + def _get_referred(label, ref): + return XCAFDoc_ShapeTool.GetReferredShape_s(label, ref) + def _get_shape(st, label): + return st.GetShape_s(label) + def _get_name_id(): + return TDataStd_Name.GetID_s() + def _brepbndlib_add(shape, bbox): + _brepbndlib_mod.Add_s(shape, bbox) + + # ── Read STEP file ──────────────────────────────────────────────── + doc = TDocStd_Document(TCollection_ExtendedString("MDTV-CAF")) + reader = STEPCAFControl_Reader() + reader.SetColorMode(True) + reader.SetNameMode(True) + status = reader.ReadFile(str(step_path)) + if not reader.Transfer(doc): + logger.warning("extract_rich_metadata: XCAF transfer failed for %s", step_path) + return result + + if _using_ocp: + shape_tool = XCAFDoc_DocumentTool.ShapeTool_s(doc.Main()) + else: + shape_tool = XCAFDoc_DocumentTool.ShapeTool(doc.Main()) + + free_labels = TDF_LabelSequence() if _using_ocp else [] + if _using_ocp: + shape_tool.GetFreeShapes(free_labels) + else: + shape_tool.GetFreeShapes(free_labels) + + # ── Walk the XCAF assembly tree ─────────────────────────────────── + # Collect all leaf shapes with their names, tracking unique shapes via IsSame() + leaf_shapes: list[tuple] = [] # (name, shape) + unique_shapes: list = [] # list of (name, shape) for distinct shapes + max_depth = 0 + + def _label_name(label) -> str: + name_attr = TDataStd_Name() + if label.FindAttribute(_get_name_id(), name_attr): + return name_attr.Get().ToExtString() + return "" + + def _walk(label, depth: int) -> None: + nonlocal max_depth + if depth > max_depth: + max_depth = depth + + # Dereference component references + actual_label = label + if _is_reference(label): + if _using_ocp: + ref_label = TDF_Label() + if _get_referred(label, ref_label): + actual_label = ref_label + else: + from OCC.Core.TDF import TDF_Label as _TDF_Label + ref_label = _TDF_Label() + if _get_referred(label, ref_label): + actual_label = ref_label + + components = TDF_LabelSequence() if _using_ocp else [] + _get_components(actual_label, components) + + n_components = components.Length() if _using_ocp else len(components) + if n_components == 0: + # Leaf node + name = _label_name(label) or _label_name(actual_label) + shape = _get_shape(shape_tool, actual_label) + if shape is not None and not shape.IsNull(): + leaf_shapes.append((name, shape)) + # Check uniqueness via IsSame + is_unique = True + for _, existing_shape in unique_shapes: + if shape.IsSame(existing_shape): + is_unique = False + break + if is_unique: + unique_shapes.append((name, shape)) + else: + if _using_ocp: + for i in range(1, n_components + 1): + _walk(components.Value(i), depth + 1) + else: + for child in components: + _walk(child, depth + 1) + + n_free = free_labels.Length() if _using_ocp else len(free_labels) + for i in range(1, n_free + 1) if _using_ocp else range(len(free_labels)): + label = free_labels.Value(i) if _using_ocp else free_labels[i] + _walk(label, 0) + + result["part_count"] = len(leaf_shapes) + result["unique_part_count"] = len(unique_shapes) + result["instance_count"] = len(leaf_shapes) + result["assembly_depth"] = max_depth + + # ── Volume and surface area per unique shape ────────────────────── + skip_volume = len(leaf_shapes) > 200 + if skip_volume: + logger.info( + "extract_rich_metadata: %d parts > 200, skipping volume computation", + len(leaf_shapes), + ) + + total_volume = 0.0 # mm³ + total_area = 0.0 # mm² + largest_name = "" + largest_volume = 0.0 # mm³ + + # Build a count of how many instances each unique shape has + instance_counts: dict[int, int] = {} # index in unique_shapes → count + for _, leaf_shape in leaf_shapes: + for idx, (_, u_shape) in enumerate(unique_shapes): + if leaf_shape.IsSame(u_shape): + instance_counts[idx] = instance_counts.get(idx, 0) + 1 + break + + if not skip_volume: + for idx, (name, shape) in enumerate(unique_shapes): + if time.monotonic() - t_start > TIME_LIMIT: + logger.warning("extract_rich_metadata: time limit reached, stopping volume computation") + break + + count = instance_counts.get(idx, 1) + try: + props = GProp_GProps() + if _using_ocp: + brepgprop.VolumeProperties_s(shape, props) + else: + brepgprop.VolumeProperties(shape, props) + vol = abs(props.Mass()) # mm³, abs() for reversed shapes + total_volume += vol * count + if vol > largest_volume: + largest_volume = vol + largest_name = name + except Exception: + pass + + try: + props = GProp_GProps() + if _using_ocp: + brepgprop.SurfaceProperties_s(shape, props) + else: + brepgprop.SurfaceProperties(shape, props) + area = abs(props.Mass()) # mm² + total_area += area * count + except Exception: + pass + + result["total_volume_cm3"] = round(total_volume / 1000.0, 2) # mm³ → cm³ + result["total_surface_area_cm2"] = round(total_area / 100.0, 2) # mm² → cm² + result["largest_part"] = { + "name": largest_name, + "volume_cm3": round(largest_volume / 1000.0, 2), + } + + # ── Smallest dimension across all parts ────────────────────────── + smallest_dim = float("inf") + for _, shape in unique_shapes: + if time.monotonic() - t_start > TIME_LIMIT: + break + try: + bbox = Bnd_Box() + _brepbndlib_add(shape, bbox) + xmin, ymin, zmin, xmax, ymax, zmax = bbox.Get() + dims = [abs(xmax - xmin), abs(ymax - ymin), abs(zmax - zmin)] + min_dim = min(d for d in dims if d > 1e-6) # skip degenerate + if min_dim < smallest_dim: + smallest_dim = min_dim + except Exception: + pass + result["smallest_dimension_mm"] = round(smallest_dim, 2) if smallest_dim < float("inf") else 0.0 + + # ── Triangle and vertex counts from tessellation ────────────────── + # Tessellate all root shapes first (coarse, for counting only) + total_triangles = 0 + total_vertices = 0 + for i in range(1, n_free + 1) if _using_ocp else range(len(free_labels)): + label = free_labels.Value(i) if _using_ocp else free_labels[i] + shape = _get_shape(shape_tool, label) + if shape is not None and not shape.IsNull(): + BRepMesh_IncrementalMesh(shape, 0.5, False, 0.5) + + # Walk faces and sum Poly_Triangulation data + explorer = TopExp_Explorer(shape, TopAbs_FACE) + while explorer.More(): + face = _TopoDS.Face_s(explorer.Current()) if _using_ocp \ + else _TopoDS.Face(explorer.Current()) + try: + loc = TopLoc_Location() + if _using_ocp: + tri = BRep_Tool.Triangulation_s(face, loc) + else: + tri = BRep_Tool.Triangulation(face, loc) + if tri is not None: + total_triangles += tri.NbTriangles() + total_vertices += tri.NbNodes() + except Exception: + pass + explorer.Next() + + result["total_triangle_count"] = total_triangles + result["total_vertex_count"] = total_vertices + + # ── Complexity score ────────────────────────────────────────────── + if total_triangles < 5000: + result["complexity_score"] = "low" + elif total_triangles <= 50000: + result["complexity_score"] = "medium" + else: + result["complexity_score"] = "high" + + elapsed = time.monotonic() - t_start + logger.info( + "extract_rich_metadata: %d parts (%d unique), %.1f cm³, %d tris, " + "complexity=%s, %.2fs", + result["part_count"], + result["unique_part_count"], + result["total_volume_cm3"], + result["total_triangle_count"], + result["complexity_score"], + elapsed, + ) + return result + + except ImportError: + logger.warning("OCC not available for extract_rich_metadata") + return result + except Exception as exc: + logger.warning("extract_rich_metadata failed: %s", exc) + return result + + def _extract_step_objects(step_path: Path) -> list[str]: """Extract part names from STEP file using pythonocc.""" try: diff --git a/backend/app/tasks/step_tasks.py b/backend/app/tasks/step_tasks.py index 8a4e5c5..79ec6e3 100644 --- a/backend/app/tasks/step_tasks.py +++ b/backend/app/tasks/step_tasks.py @@ -8,6 +8,7 @@ Celery discovers tasks by import path, so these re-exports are required. from app.domains.pipeline.tasks.extract_metadata import ( # noqa: F401 process_step_file, reextract_cad_metadata, + reextract_rich_metadata_task, ) from app.domains.pipeline.tasks.render_thumbnail import ( # noqa: F401 render_step_thumbnail, diff --git a/frontend/src/api/products.ts b/frontend/src/api/products.ts index c661f13..5f8dc3d 100644 --- a/frontend/src/api/products.ts +++ b/frontend/src/api/products.ts @@ -67,6 +67,7 @@ export interface Product { } | null arbeitspaket: string | null cad_render_log?: RenderLog | null + cad_metadata: Record | null notes: string | null is_active: boolean source_excel: string | null diff --git a/frontend/src/pages/Admin.tsx b/frontend/src/pages/Admin.tsx index e09b985..e9372b2 100644 --- a/frontend/src/pages/Admin.tsx +++ b/frontend/src/pages/Admin.tsx @@ -179,6 +179,14 @@ export default function AdminPage() { onError: (e: any) => toast.error(e.response?.data?.detail || 'Failed'), }) + const reextractRichMetadataMut = useMutation({ + mutationFn: () => api.post('/admin/settings/reextract-rich-metadata'), + onSuccess: (res) => { + toast.success(res.data.message || 'Rich metadata re-extraction queued') + }, + onError: (e: any) => toast.error(e.response?.data?.detail || 'Failed'), + }) + const cleanupOrphanedCadMut = useMutation({ mutationFn: () => api.post('/admin/settings/cleanup-orphaned-cad-files'), onSuccess: (res) => { @@ -1228,6 +1236,21 @@ export default function AdminPage() { + +
+

Re-extract Rich Metadata

+

Re-compute volume, surface area, complexity for all products with STEP files.

+
+ +
+
diff --git a/frontend/src/pages/ProductDetail.tsx b/frontend/src/pages/ProductDetail.tsx index 2b59944..d8d2ec1 100644 --- a/frontend/src/pages/ProductDetail.tsx +++ b/frontend/src/pages/ProductDetail.tsx @@ -626,6 +626,54 @@ export default function ProductDetailPage() {

)} + {product.cad_metadata && (() => { + const meta = product.cad_metadata as any + return ( +
+ +
+ {meta.part_count != null && ( +
+

{meta.part_count}

+

Parts

+
+ )} + {meta.total_volume_cm3 != null && ( +
+

{Number(meta.total_volume_cm3).toFixed(1)}

+

Volume (cm³)

+
+ )} + {meta.total_surface_area_cm2 != null && ( +
+

{Number(meta.total_surface_area_cm2).toFixed(1)}

+

Surface (cm²)

+
+ )} + {meta.complexity_score != null && ( +
+

{Number(meta.complexity_score).toFixed(2)}

+

Complexity

+
+ )} + {meta.total_triangle_count != null && ( +
+

{Number(meta.total_triangle_count).toLocaleString()}

+

Triangles

+
+ )} + {meta.assembly_depth != null && ( +
+

{meta.assembly_depth}

+

Assembly Depth

+
+ )} +
+
+ ) + })()} {editMode && isPrivileged && ( diff --git a/plan.md b/plan.md index ec2676d..bc5b706 100644 --- a/plan.md +++ b/plan.md @@ -1,208 +1,122 @@ -# Plan: Tenant AI Chat Agent (Actionable) +# Plan: Rich Product Metadata Extraction from STEP Files ## Context -Each tenant has Azure OpenAI credentials stored in `tenant_config` JSONB. The goal is an **actionable AI agent** where users can type natural language commands to control the render pipeline — create orders, dispatch renders, check status, set overrides — scoped to their tenant. +The AI chat agent was asked "What is the biggest product from my order?" and couldn't answer because dimensional data wasn't available in tool results. While `cad_files.mesh_attributes` already stores bounding box dimensions, much more metadata is extractable from STEP files via OCC that would make the AI agent and the product library significantly more useful. -Example interactions: -- "Render all Kugellager products as WebP at 1024x1024" -- "What's the status of my last order?" -- "Set material override to Steel-Bare on order SA-2026-00160" -- "How many renders failed this week?" +**Currently extracted**: part names, bounding box (xyz), sharp edges, smooth angle +**Available but not extracted**: per-part volume, surface area, assembly hierarchy, instance counts, embedded colors, triangle counts, geometric complexity -The agent uses **function calling** (Azure OpenAI tool use) — the LLM decides which API action to execute, the backend executes it, and returns the result. Tenants are fully isolated — each uses their own Azure API key and only sees their own data. - -**What exists:** -- Per-tenant Azure OpenAI credentials in `tenant_config` JSONB -- WebSocket system scoped by tenant for real-time events -- `ai_validation` Celery queue (concurrency=8) -- Azure OpenAI integration boilerplate in `azure_ai.py` +**Goal**: Expand the STEP metadata extraction to compute richer product characteristics and store them in a structured `cad_metadata` JSONB field, accessible to the AI agent, product search, and frontend. ## Affected Files | File | Change | |------|--------| -| `backend/app/models/chat.py` | **NEW** — ChatMessage model | -| `backend/app/models/__init__.py` | Import ChatMessage | -| `backend/app/api/routers/chat.py` | **NEW** — Chat API endpoints | -| `backend/app/services/chat_service.py` | **NEW** — Azure OpenAI chat + DB context | -| `backend/app/main.py` | Register chat router | -| `backend/alembic/versions/XXX_add_chat_messages.py` | Migration | -| `frontend/src/api/chat.ts` | **NEW** — Chat API types + functions | -| `frontend/src/components/chat/ChatPanel.tsx` | **NEW** — Chat UI component | -| `frontend/src/components/layout/Layout.tsx` | Add chat toggle button | +| `backend/app/services/step_processor.py` | Expand `extract_step_metadata()` with volume, surface area, hierarchy, complexity | +| `backend/app/domains/products/models.py` | Add `cad_metadata` JSONB column to Product | +| `backend/alembic/versions/XXX_add_cad_metadata.py` | Migration | +| `backend/app/domains/pipeline/tasks/extract_metadata.py` | Populate `cad_metadata` after STEP processing | +| `backend/app/domains/products/schemas.py` | Expose `cad_metadata` in ProductOut | +| `backend/app/services/chat_service.py` | Include metadata in search_products and system prompt | +| `frontend/src/pages/ProductDetail.tsx` | Display rich metadata (volume, part count, complexity) | ## Tasks (in order) -### [ ] Task 1: ChatMessage model + migration +### [ ] Task 1: Expand STEP metadata extraction -- **File**: `backend/app/models/chat.py` (new) -- **What**: Create a ChatMessage model: +- **File**: `backend/app/services/step_processor.py` +- **What**: Expand `extract_step_metadata()` to compute additional properties after the existing bbox/edge extraction. Add a new function `extract_rich_metadata(doc, shape_tool)` that returns: ```python - class ChatMessage(Base): - __tablename__ = "chat_messages" - id: UUID PK - tenant_id: UUID FK → tenants.id (nullable, indexed) - user_id: UUID FK → users.id (nullable) - session_id: UUID (groups messages in a conversation, indexed) - role: String(20) — "user", "assistant", "system" - content: Text - context_type: String(50) nullable — "order", "product", "general" - context_id: UUID nullable — order_id or product_id - token_count: Integer nullable — track usage - created_at: DateTime + { + "part_count": 42, # Number of leaf parts + "assembly_depth": 3, # Max nesting depth + "total_volume_cm3": 1250.4, # Sum of all part volumes (cm³) + "total_surface_area_cm2": 3400.2, # Sum of all surface areas (cm²) + "total_triangle_count": 45000, # After tessellation + "total_vertex_count": 23000, # After tessellation + "largest_part": { # Part with largest volume + "name": "OuterRing", + "volume_cm3": 450.2, + }, + "smallest_dimension_mm": 0.5, # Smallest bbox dimension across all parts + "instance_count": 36, # Total instances (parts may repeat) + "unique_part_count": 12, # Distinct shapes + "complexity_score": "high", # low/medium/high based on triangle count + } ``` -- **Also**: Import in `backend/app/models/__init__.py` -- **Migration**: `alembic revision --autogenerate -m "add chat_messages table"` -- **Acceptance gate**: Table exists in DB; model importable + Use OCC: + - `GProp_GProps` + `BRepGProp.VolumeProperties()` for volume + - `BRepGProp.SurfaceProperties()` for surface area + - `Poly_Triangulation` for triangle/vertex counts (after tessellation) + - Assembly tree walk (already done in `_collect_part_key_map`) for hierarchy depth + instance count +- **Acceptance gate**: `extract_rich_metadata()` returns all fields for a test STEP file - **Dependencies**: None -### [ ] Task 2: Chat service — Azure OpenAI with function calling +### [ ] Task 2: Add cad_metadata column to Product model -- **File**: `backend/app/services/chat_service.py` (new) -- **What**: Service with Azure OpenAI **tool use / function calling**: - 1. Takes a user message + session_id + tenant_id + user_id - 2. Loads tenant Azure credentials from `tenant_config` - 3. Defines **tools** the LLM can call (JSON schema for each): - - `list_orders(status, limit)` — list tenant's orders - - `search_products(query, category, limit)` — search products - - `create_order(product_ids, output_type_name, render_overrides, material_override)` — create & submit - - `dispatch_renders(order_id)` — dispatch renders for an order - - `get_order_status(order_id)` — check render progress - - `set_material_override(order_id, material_name)` — batch material override - - `set_render_overrides(order_id, overrides)` — batch render overrides - - `get_render_stats()` — throughput stats - - `check_materials(order_id)` — unmapped materials check - - `query_database(sql)` — read-only SQL (SELECT only, tenant-scoped) - 4. Calls Azure OpenAI with `tools` parameter — the LLM decides which tool to call - 5. Executes the tool call internally (same functions as MCP server but tenant-scoped) - 6. Returns tool result to LLM for a natural language response - 7. Stores conversation in ChatMessage table +- **File**: `backend/app/domains/products/models.py` +- **What**: Add `cad_metadata: Mapped[dict | None] = mapped_column(JSONB, nullable=True, default=None)` to the Product model. This stores the rich metadata at the product level (not cad_file) because products are the user-facing entity. +- **Migration**: `alembic revision --autogenerate -m "add cad_metadata to products"` +- **Also**: Add to ProductOut schema in `backend/app/domains/products/schemas.py` +- **Acceptance gate**: Column exists, schema includes it +- **Dependencies**: None - **Tenant isolation**: All DB queries filter by `tenant_id`. The `query_database` tool auto-appends `WHERE tenant_id = '{tenant_id}'` or validates tenant scope. +### [ ] Task 3: Populate cad_metadata during STEP processing - **Tool execution**: Uses the existing backend API functions directly (not HTTP calls) — import from the routers/services. +- **File**: `backend/app/domains/pipeline/tasks/extract_metadata.py` +- **What**: After `process_step_file` extracts objects and queues thumbnail, call `extract_rich_metadata()` and store the result on the Product's `cad_metadata` field. Also store it on `cad_files.mesh_attributes` (merge with existing data). +- **Also**: Add a "reextract metadata" admin action that re-runs this for all existing products +- **Acceptance gate**: After STEP processing, product.cad_metadata is populated with volume, part_count, etc. +- **Dependencies**: Tasks 1, 2 - ```python - tools = [ - { - "type": "function", - "function": { - "name": "search_products", - "description": "Search products by name, PIM-ID, or category", - "parameters": { - "type": "object", - "properties": { - "query": {"type": "string"}, - "category": {"type": "string"}, - } - } - } - }, - # ... more tools - ] - response = client.chat.completions.create( - model=deployment, - messages=messages, - tools=tools, - tool_choice="auto", - ) - # Handle tool_calls in response, execute, return result - ``` -- **Acceptance gate**: User can say "show my last 5 orders" and get real data back via function calling -- **Dependencies**: Task 1 +### [ ] Task 4: Expose metadata in AI agent tools -### [ ] Task 3: Chat API endpoints - -- **File**: `backend/app/api/routers/chat.py` (new) -- **What**: FastAPI router with endpoints: - - `POST /api/chat/messages` — send a message, get AI response - - Body: `{ message: str, session_id: str | None, context_type: str | None, context_id: str | None }` - - Creates session_id if not provided - - Returns: `{ session_id: str, message: ChatMessageOut, response: ChatMessageOut }` - - Auth: `get_current_user` — uses user's tenant AI config - - `GET /api/chat/sessions` — list user's chat sessions - - Returns: `[{ session_id, last_message, message_count, created_at }]` - - `GET /api/chat/sessions/{session_id}/messages` — get conversation history - - Returns: `[{ id, role, content, created_at }]` - - `DELETE /api/chat/sessions/{session_id}` — delete a conversation -- **Also**: Register router in `backend/app/main.py` -- **Acceptance gate**: POST /api/chat/messages returns an AI response using tenant credentials -- **Dependencies**: Task 2 - -### [ ] Task 4: Frontend — Chat API types - -- **File**: `frontend/src/api/chat.ts` (new) -- **What**: TypeScript interfaces and API functions: - ```typescript - interface ChatMessage { id: string; role: 'user' | 'assistant' | 'system'; content: string; created_at: string } - interface ChatSession { session_id: string; last_message: string; message_count: number; created_at: string } - interface ChatResponse { session_id: string; message: ChatMessage; response: ChatMessage } - - function sendMessage(message: string, sessionId?: string, contextType?: string, contextId?: string): Promise - function getSessions(): Promise - function getSessionMessages(sessionId: string): Promise - function deleteSession(sessionId: string): Promise - ``` -- **Acceptance gate**: Types compile; functions callable +- **File**: `backend/app/services/chat_service.py` +- **What**: + 1. Update `_tool_search_products()` to include `cad_metadata` fields (part_count, total_volume_cm3, complexity_score) in results + 2. Update `query_database` tool description to mention `products.cad_metadata` JSONB field + 3. Update system prompt to mention available metadata +- **Acceptance gate**: AI agent can answer "What is the biggest product?" using volume data - **Dependencies**: Task 3 -### [ ] Task 5: Frontend — ChatPanel component +### [ ] Task 5: Display rich metadata on ProductDetail page -- **File**: `frontend/src/components/chat/ChatPanel.tsx` (new) -- **What**: Slide-out chat panel (right side, similar to notification panels in modern apps): - 1. **Header**: "AI Assistant" title + close button + session selector - 2. **Message list**: Scrollable area with role-based styling: - - User messages: right-aligned, accent background - - Assistant messages: left-aligned, surface background, markdown support - - Timestamps below each message - 3. **Input area**: Text input + send button (Enter to send) - 4. **Loading state**: Typing indicator while waiting for AI response - 5. **Session management**: "New conversation" button, session history dropdown - 6. **Context awareness**: When opened from an order/product page, auto-includes context +- **File**: `frontend/src/pages/ProductDetail.tsx` +- **What**: Add a "CAD Metadata" section on the product detail page showing: + - Part count + unique parts + instances + - Total volume (cm³) + surface area (cm²) + - Largest part name + volume + - Complexity score badge (low/medium/high) + - Triangle/vertex count + - Assembly depth +- **Acceptance gate**: Metadata displayed on product page; empty gracefully when not available +- **Dependencies**: Task 2 - **Styling**: - - Fixed right panel (w-96, full height) - - Backdrop overlay on mobile - - Smooth slide-in animation - - Use existing CSS variables (surface, content, accent) - - lucide-react icons (MessageSquare, Send, Loader2, X, Plus) -- **Acceptance gate**: Panel opens/closes, messages send and display, AI responds -- **Dependencies**: Task 4 +### [ ] Task 6: Batch re-extract metadata for existing products -### [ ] Task 6: Frontend — Chat toggle in Layout - -- **File**: `frontend/src/components/layout/Layout.tsx` -- **What**: Add a chat toggle button: - 1. Floating button in bottom-right corner (or in the sidebar) - 2. Icon: `MessageSquare` from lucide-react - 3. Badge with unread count (optional, for future) - 4. Click toggles ChatPanel visibility - 5. Only show when tenant has `ai_enabled = true` -- **Acceptance gate**: Button visible for users with AI-enabled tenant; clicking opens/closes ChatPanel -- **Dependencies**: Task 5 +- **File**: `backend/app/api/routers/admin.py` +- **What**: Add a "Re-extract Rich Metadata" button in System Tools that queues a Celery task to re-process all completed STEP files and populate `cad_metadata` for all products. +- **Acceptance gate**: Button triggers batch job; existing products get metadata populated +- **Dependencies**: Tasks 1, 3 ## Migration Check -**Yes** — one new table `chat_messages` with UUID PK, FK to tenants and users. +**Yes** — one new JSONB column on `products` table. ## Order Recommendation -1. Backend model + migration (Task 1) -2. Backend service (Task 2) -3. Backend API (Task 3) -4. Frontend types (Task 4) -5. Frontend chat UI (Task 5) -6. Frontend layout integration (Task 6) +1. Task 1 (extraction logic) + Task 2 (model + migration) — parallel +2. Task 3 (wire up in pipeline) +3. Task 4 (AI agent) + Task 5 (frontend) — parallel +4. Task 6 (batch re-extract) ## Risks / Open Questions -1. **Azure OpenAI availability**: If tenant hasn't configured AI credentials, the chat should show a helpful message ("AI not configured — ask your admin to set up Azure OpenAI in Tenant Settings") +1. **Volume calculation accuracy**: OCC `BRepGProp` computes exact B-rep volume, not mesh-based. This is accurate but can be slow for very complex shapes. Cap at 5s per file. -2. **Token costs**: Each message uses Azure OpenAI tokens. Consider adding token counting and a configurable monthly limit per tenant. +2. **Performance**: Rich metadata extraction adds ~100-500ms per STEP file. This is acceptable since STEP processing already takes 1-5s. -3. **Context enrichment**: The system prompt could include live data (order counts, render status). This makes the AI more helpful but costs more tokens. Start simple, enhance later. +3. **Existing products**: ~45 products with STEP files need backfill. Task 6 handles this. -4. **Streaming responses**: Azure OpenAI supports streaming. V1 uses a simple request/response. V2 could stream via WebSocket for real-time typing effect. - -5. **openai package**: The `openai` Python package must be installed in the backend container. Check if it's already a dependency (it may be via `azure_ai.py`). +4. **Triangle count varies**: Depends on tessellation settings (deflection angles). Store the count at the current tessellation quality for reference, with a note that it's approximate.