feat: rich product metadata extraction from STEP files
Extract volume, surface area, part count, assembly hierarchy, and complexity from STEP files via OCC B-rep analysis. Backend: - extract_rich_metadata() in step_processor.py: computes per-part volume (BRepGProp), surface area, triangle/vertex count, assembly depth, instance count, complexity score, largest part identification - cad_metadata JSONB column on Product model (DB migration) - Auto-populated during STEP processing (non-fatal, 10s timeout) - Also stored in cad_files.mesh_attributes["rich_metadata"] - Batch re-extract endpoint: POST /admin/settings/reextract-rich-metadata AI Agent: - search_products returns part_count, volume_cm3, complexity, largest_part - query_database tool description documents cad_metadata schema Frontend: - ProductDetail page: CAD Metadata section with stat cards (parts, volume, surface area, complexity, triangles, assembly depth) - Admin System Tools: "Re-extract Rich Metadata" button for backfill Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -219,7 +219,7 @@ TOOLS = [
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "query_database",
|
||||
"description": "Execute a read-only SQL SELECT query against the database. Key tables/columns: products(id, name, pim_id, category_key, cad_file_id, is_active, tenant_id), orders(id, order_number, status, tenant_id), order_lines(id, order_id, product_id, render_status, material_override, render_overrides), cad_files(id, mesh_attributes->'dimensions_mm' with {x,y,z} in mm, parsed_objects, processing_status). To get product dimensions: JOIN cad_files cf ON cf.id = p.cad_file_id and use cf.mesh_attributes->'dimensions_mm'. Use :tenant_id parameter for tenant filtering. Category is 'category_key' not 'category'.",
|
||||
"description": "Execute a read-only SQL SELECT query against the database. Key tables/columns: products(id, name, pim_id, category_key, cad_file_id, is_active, tenant_id, cad_metadata JSONB), orders(id, order_number, status, tenant_id), order_lines(id, order_id, product_id, render_status, material_override, render_overrides), cad_files(id, mesh_attributes->'dimensions_mm' with {x,y,z} in mm, parsed_objects, processing_status). products.cad_metadata JSONB contains: part_count, unique_part_count, instance_count, assembly_depth, total_volume_cm3, total_surface_area_cm2, total_triangle_count, complexity_score, largest_part (name + volume_cm3). To get product dimensions: JOIN cad_files cf ON cf.id = p.cad_file_id and use cf.mesh_attributes->'dimensions_mm'. Use :tenant_id parameter for tenant filtering. Category is 'category_key' not 'category'.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -330,7 +330,12 @@ async def _tool_search_products(db: AsyncSession, tenant_id: str, query: str = "
|
||||
cf.processing_status,
|
||||
cf.mesh_attributes->'dimensions_mm'->>'x' AS dim_x_mm,
|
||||
cf.mesh_attributes->'dimensions_mm'->>'y' AS dim_y_mm,
|
||||
cf.mesh_attributes->'dimensions_mm'->>'z' AS dim_z_mm
|
||||
cf.mesh_attributes->'dimensions_mm'->>'z' AS dim_z_mm,
|
||||
p.cad_metadata->>'part_count' AS part_count,
|
||||
p.cad_metadata->>'total_volume_cm3' AS volume_cm3,
|
||||
p.cad_metadata->>'complexity_score' AS complexity,
|
||||
p.cad_metadata->'largest_part'->>'name' AS largest_part_name,
|
||||
p.cad_metadata->'largest_part'->>'volume_cm3' AS largest_part_volume
|
||||
FROM products p
|
||||
LEFT JOIN cad_files cf ON cf.id = p.cad_file_id
|
||||
WHERE p.tenant_id = :tenant_id
|
||||
|
||||
@@ -632,6 +632,332 @@ def extract_step_metadata(step_path: str) -> StepMetadata:
|
||||
return StepMetadata()
|
||||
|
||||
|
||||
def extract_rich_metadata(step_path: str) -> dict:
|
||||
"""Extract rich geometric metadata from a STEP file.
|
||||
|
||||
Opens the STEP file via XCAF, walks the assembly tree, and computes
|
||||
volume, surface area, hierarchy depth, triangle/vertex counts, and
|
||||
complexity metrics for each unique part.
|
||||
|
||||
Runs in the worker container (step_processing queue) which has OCP installed.
|
||||
Processing is capped at 10 seconds; volume computation is skipped for files
|
||||
with more than 200 parts.
|
||||
|
||||
Returns a dict with keys: part_count, unique_part_count, instance_count,
|
||||
assembly_depth, total_volume_cm3, total_surface_area_cm2,
|
||||
total_triangle_count, total_vertex_count, largest_part,
|
||||
smallest_dimension_mm, complexity_score.
|
||||
"""
|
||||
import time
|
||||
|
||||
t_start = time.monotonic()
|
||||
TIME_LIMIT = 10.0 # seconds
|
||||
|
||||
result: dict = {
|
||||
"part_count": 0,
|
||||
"unique_part_count": 0,
|
||||
"instance_count": 0,
|
||||
"assembly_depth": 0,
|
||||
"total_volume_cm3": 0.0,
|
||||
"total_surface_area_cm2": 0.0,
|
||||
"total_triangle_count": 0,
|
||||
"total_vertex_count": 0,
|
||||
"largest_part": {"name": "", "volume_cm3": 0.0},
|
||||
"smallest_dimension_mm": 0.0,
|
||||
"complexity_score": "low",
|
||||
}
|
||||
|
||||
try:
|
||||
# Import OCC — try OCC.Core first, fall back to OCP
|
||||
_using_ocp = False
|
||||
try:
|
||||
from OCC.Core.STEPCAFControl import STEPCAFControl_Reader
|
||||
from OCC.Core.XCAFDoc import XCAFDoc_DocumentTool
|
||||
from OCC.Core.TDocStd import TDocStd_Document
|
||||
from OCC.Core.TDataStd import TDataStd_Name
|
||||
from OCC.Core.TCollection import TCollection_ExtendedString
|
||||
from OCC.Core.TDF import TDF_LabelSequence
|
||||
from OCC.Core.XCAFDoc import XCAFDoc_ShapeTool
|
||||
from OCC.Core.BRepGProp import brepgprop
|
||||
from OCC.Core.GProp import GProp_GProps
|
||||
from OCC.Core.BRepMesh import BRepMesh_IncrementalMesh
|
||||
from OCC.Core.TopExp import TopExp_Explorer
|
||||
from OCC.Core.TopAbs import TopAbs_FACE
|
||||
from OCC.Core.TopoDS import TopoDS as _TopoDS
|
||||
from OCC.Core.BRep import BRep_Tool
|
||||
from OCC.Core.TopLoc import TopLoc_Location
|
||||
from OCC.Core.Bnd import Bnd_Box
|
||||
from OCC.Core.BRepBndLib import brepbndlib as _brepbndlib_mod
|
||||
|
||||
def _get_components(label, seq):
|
||||
XCAFDoc_ShapeTool.GetComponents(label, seq)
|
||||
def _is_reference(label):
|
||||
return XCAFDoc_ShapeTool.IsReference(label)
|
||||
def _get_referred(label, ref):
|
||||
return XCAFDoc_ShapeTool.GetReferredShape(label, ref)
|
||||
def _get_shape(st, label):
|
||||
return st.GetShape(label)
|
||||
def _get_name_id():
|
||||
return TDataStd_Name.GetID()
|
||||
def _brepbndlib_add(shape, bbox):
|
||||
_brepbndlib_mod.Add(shape, bbox)
|
||||
except ImportError:
|
||||
from OCP.STEPCAFControl import STEPCAFControl_Reader # type: ignore[no-redef]
|
||||
from OCP.XCAFDoc import XCAFDoc_DocumentTool # type: ignore[no-redef]
|
||||
from OCP.TDocStd import TDocStd_Document # type: ignore[no-redef]
|
||||
from OCP.TDataStd import TDataStd_Name # type: ignore[no-redef]
|
||||
from OCP.TCollection import TCollection_ExtendedString # type: ignore[no-redef]
|
||||
from OCP.TDF import TDF_LabelSequence, TDF_Label # type: ignore[no-redef]
|
||||
from OCP.XCAFDoc import XCAFDoc_ShapeTool # type: ignore[no-redef]
|
||||
from OCP.BRepGProp import brepgprop # type: ignore[no-redef]
|
||||
from OCP.GProp import GProp_GProps # type: ignore[no-redef]
|
||||
from OCP.BRepMesh import BRepMesh_IncrementalMesh # type: ignore[no-redef]
|
||||
from OCP.TopExp import TopExp_Explorer # type: ignore[no-redef]
|
||||
from OCP.TopAbs import TopAbs_FACE # type: ignore[no-redef]
|
||||
from OCP.TopoDS import TopoDS as _TopoDS # type: ignore[no-redef]
|
||||
from OCP.BRep import BRep_Tool # type: ignore[no-redef]
|
||||
from OCP.TopLoc import TopLoc_Location # type: ignore[no-redef]
|
||||
from OCP.Bnd import Bnd_Box # type: ignore[no-redef]
|
||||
from OCP.BRepBndLib import BRepBndLib as _brepbndlib_mod # type: ignore[no-redef]
|
||||
_using_ocp = True
|
||||
|
||||
def _get_components(label, seq):
|
||||
XCAFDoc_ShapeTool.GetComponents_s(label, seq)
|
||||
def _is_reference(label):
|
||||
return XCAFDoc_ShapeTool.IsReference_s(label)
|
||||
def _get_referred(label, ref):
|
||||
return XCAFDoc_ShapeTool.GetReferredShape_s(label, ref)
|
||||
def _get_shape(st, label):
|
||||
return st.GetShape_s(label)
|
||||
def _get_name_id():
|
||||
return TDataStd_Name.GetID_s()
|
||||
def _brepbndlib_add(shape, bbox):
|
||||
_brepbndlib_mod.Add_s(shape, bbox)
|
||||
|
||||
# ── Read STEP file ────────────────────────────────────────────────
|
||||
doc = TDocStd_Document(TCollection_ExtendedString("MDTV-CAF"))
|
||||
reader = STEPCAFControl_Reader()
|
||||
reader.SetColorMode(True)
|
||||
reader.SetNameMode(True)
|
||||
status = reader.ReadFile(str(step_path))
|
||||
if not reader.Transfer(doc):
|
||||
logger.warning("extract_rich_metadata: XCAF transfer failed for %s", step_path)
|
||||
return result
|
||||
|
||||
if _using_ocp:
|
||||
shape_tool = XCAFDoc_DocumentTool.ShapeTool_s(doc.Main())
|
||||
else:
|
||||
shape_tool = XCAFDoc_DocumentTool.ShapeTool(doc.Main())
|
||||
|
||||
free_labels = TDF_LabelSequence() if _using_ocp else []
|
||||
if _using_ocp:
|
||||
shape_tool.GetFreeShapes(free_labels)
|
||||
else:
|
||||
shape_tool.GetFreeShapes(free_labels)
|
||||
|
||||
# ── Walk the XCAF assembly tree ───────────────────────────────────
|
||||
# Collect all leaf shapes with their names, tracking unique shapes via IsSame()
|
||||
leaf_shapes: list[tuple] = [] # (name, shape)
|
||||
unique_shapes: list = [] # list of (name, shape) for distinct shapes
|
||||
max_depth = 0
|
||||
|
||||
def _label_name(label) -> str:
|
||||
name_attr = TDataStd_Name()
|
||||
if label.FindAttribute(_get_name_id(), name_attr):
|
||||
return name_attr.Get().ToExtString()
|
||||
return ""
|
||||
|
||||
def _walk(label, depth: int) -> None:
|
||||
nonlocal max_depth
|
||||
if depth > max_depth:
|
||||
max_depth = depth
|
||||
|
||||
# Dereference component references
|
||||
actual_label = label
|
||||
if _is_reference(label):
|
||||
if _using_ocp:
|
||||
ref_label = TDF_Label()
|
||||
if _get_referred(label, ref_label):
|
||||
actual_label = ref_label
|
||||
else:
|
||||
from OCC.Core.TDF import TDF_Label as _TDF_Label
|
||||
ref_label = _TDF_Label()
|
||||
if _get_referred(label, ref_label):
|
||||
actual_label = ref_label
|
||||
|
||||
components = TDF_LabelSequence() if _using_ocp else []
|
||||
_get_components(actual_label, components)
|
||||
|
||||
n_components = components.Length() if _using_ocp else len(components)
|
||||
if n_components == 0:
|
||||
# Leaf node
|
||||
name = _label_name(label) or _label_name(actual_label)
|
||||
shape = _get_shape(shape_tool, actual_label)
|
||||
if shape is not None and not shape.IsNull():
|
||||
leaf_shapes.append((name, shape))
|
||||
# Check uniqueness via IsSame
|
||||
is_unique = True
|
||||
for _, existing_shape in unique_shapes:
|
||||
if shape.IsSame(existing_shape):
|
||||
is_unique = False
|
||||
break
|
||||
if is_unique:
|
||||
unique_shapes.append((name, shape))
|
||||
else:
|
||||
if _using_ocp:
|
||||
for i in range(1, n_components + 1):
|
||||
_walk(components.Value(i), depth + 1)
|
||||
else:
|
||||
for child in components:
|
||||
_walk(child, depth + 1)
|
||||
|
||||
n_free = free_labels.Length() if _using_ocp else len(free_labels)
|
||||
for i in range(1, n_free + 1) if _using_ocp else range(len(free_labels)):
|
||||
label = free_labels.Value(i) if _using_ocp else free_labels[i]
|
||||
_walk(label, 0)
|
||||
|
||||
result["part_count"] = len(leaf_shapes)
|
||||
result["unique_part_count"] = len(unique_shapes)
|
||||
result["instance_count"] = len(leaf_shapes)
|
||||
result["assembly_depth"] = max_depth
|
||||
|
||||
# ── Volume and surface area per unique shape ──────────────────────
|
||||
skip_volume = len(leaf_shapes) > 200
|
||||
if skip_volume:
|
||||
logger.info(
|
||||
"extract_rich_metadata: %d parts > 200, skipping volume computation",
|
||||
len(leaf_shapes),
|
||||
)
|
||||
|
||||
total_volume = 0.0 # mm³
|
||||
total_area = 0.0 # mm²
|
||||
largest_name = ""
|
||||
largest_volume = 0.0 # mm³
|
||||
|
||||
# Build a count of how many instances each unique shape has
|
||||
instance_counts: dict[int, int] = {} # index in unique_shapes → count
|
||||
for _, leaf_shape in leaf_shapes:
|
||||
for idx, (_, u_shape) in enumerate(unique_shapes):
|
||||
if leaf_shape.IsSame(u_shape):
|
||||
instance_counts[idx] = instance_counts.get(idx, 0) + 1
|
||||
break
|
||||
|
||||
if not skip_volume:
|
||||
for idx, (name, shape) in enumerate(unique_shapes):
|
||||
if time.monotonic() - t_start > TIME_LIMIT:
|
||||
logger.warning("extract_rich_metadata: time limit reached, stopping volume computation")
|
||||
break
|
||||
|
||||
count = instance_counts.get(idx, 1)
|
||||
try:
|
||||
props = GProp_GProps()
|
||||
if _using_ocp:
|
||||
brepgprop.VolumeProperties_s(shape, props)
|
||||
else:
|
||||
brepgprop.VolumeProperties(shape, props)
|
||||
vol = abs(props.Mass()) # mm³, abs() for reversed shapes
|
||||
total_volume += vol * count
|
||||
if vol > largest_volume:
|
||||
largest_volume = vol
|
||||
largest_name = name
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
props = GProp_GProps()
|
||||
if _using_ocp:
|
||||
brepgprop.SurfaceProperties_s(shape, props)
|
||||
else:
|
||||
brepgprop.SurfaceProperties(shape, props)
|
||||
area = abs(props.Mass()) # mm²
|
||||
total_area += area * count
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
result["total_volume_cm3"] = round(total_volume / 1000.0, 2) # mm³ → cm³
|
||||
result["total_surface_area_cm2"] = round(total_area / 100.0, 2) # mm² → cm²
|
||||
result["largest_part"] = {
|
||||
"name": largest_name,
|
||||
"volume_cm3": round(largest_volume / 1000.0, 2),
|
||||
}
|
||||
|
||||
# ── Smallest dimension across all parts ──────────────────────────
|
||||
smallest_dim = float("inf")
|
||||
for _, shape in unique_shapes:
|
||||
if time.monotonic() - t_start > TIME_LIMIT:
|
||||
break
|
||||
try:
|
||||
bbox = Bnd_Box()
|
||||
_brepbndlib_add(shape, bbox)
|
||||
xmin, ymin, zmin, xmax, ymax, zmax = bbox.Get()
|
||||
dims = [abs(xmax - xmin), abs(ymax - ymin), abs(zmax - zmin)]
|
||||
min_dim = min(d for d in dims if d > 1e-6) # skip degenerate
|
||||
if min_dim < smallest_dim:
|
||||
smallest_dim = min_dim
|
||||
except Exception:
|
||||
pass
|
||||
result["smallest_dimension_mm"] = round(smallest_dim, 2) if smallest_dim < float("inf") else 0.0
|
||||
|
||||
# ── Triangle and vertex counts from tessellation ──────────────────
|
||||
# Tessellate all root shapes first (coarse, for counting only)
|
||||
total_triangles = 0
|
||||
total_vertices = 0
|
||||
for i in range(1, n_free + 1) if _using_ocp else range(len(free_labels)):
|
||||
label = free_labels.Value(i) if _using_ocp else free_labels[i]
|
||||
shape = _get_shape(shape_tool, label)
|
||||
if shape is not None and not shape.IsNull():
|
||||
BRepMesh_IncrementalMesh(shape, 0.5, False, 0.5)
|
||||
|
||||
# Walk faces and sum Poly_Triangulation data
|
||||
explorer = TopExp_Explorer(shape, TopAbs_FACE)
|
||||
while explorer.More():
|
||||
face = _TopoDS.Face_s(explorer.Current()) if _using_ocp \
|
||||
else _TopoDS.Face(explorer.Current())
|
||||
try:
|
||||
loc = TopLoc_Location()
|
||||
if _using_ocp:
|
||||
tri = BRep_Tool.Triangulation_s(face, loc)
|
||||
else:
|
||||
tri = BRep_Tool.Triangulation(face, loc)
|
||||
if tri is not None:
|
||||
total_triangles += tri.NbTriangles()
|
||||
total_vertices += tri.NbNodes()
|
||||
except Exception:
|
||||
pass
|
||||
explorer.Next()
|
||||
|
||||
result["total_triangle_count"] = total_triangles
|
||||
result["total_vertex_count"] = total_vertices
|
||||
|
||||
# ── Complexity score ──────────────────────────────────────────────
|
||||
if total_triangles < 5000:
|
||||
result["complexity_score"] = "low"
|
||||
elif total_triangles <= 50000:
|
||||
result["complexity_score"] = "medium"
|
||||
else:
|
||||
result["complexity_score"] = "high"
|
||||
|
||||
elapsed = time.monotonic() - t_start
|
||||
logger.info(
|
||||
"extract_rich_metadata: %d parts (%d unique), %.1f cm³, %d tris, "
|
||||
"complexity=%s, %.2fs",
|
||||
result["part_count"],
|
||||
result["unique_part_count"],
|
||||
result["total_volume_cm3"],
|
||||
result["total_triangle_count"],
|
||||
result["complexity_score"],
|
||||
elapsed,
|
||||
)
|
||||
return result
|
||||
|
||||
except ImportError:
|
||||
logger.warning("OCC not available for extract_rich_metadata")
|
||||
return result
|
||||
except Exception as exc:
|
||||
logger.warning("extract_rich_metadata failed: %s", exc)
|
||||
return result
|
||||
|
||||
|
||||
def _extract_step_objects(step_path: Path) -> list[str]:
|
||||
"""Extract part names from STEP file using pythonocc."""
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user