feat: rich product metadata extraction from STEP files
Extract volume, surface area, part count, assembly hierarchy, and complexity from STEP files via OCC B-rep analysis. Backend: - extract_rich_metadata() in step_processor.py: computes per-part volume (BRepGProp), surface area, triangle/vertex count, assembly depth, instance count, complexity score, largest part identification - cad_metadata JSONB column on Product model (DB migration) - Auto-populated during STEP processing (non-fatal, 10s timeout) - Also stored in cad_files.mesh_attributes["rich_metadata"] - Batch re-extract endpoint: POST /admin/settings/reextract-rich-metadata AI Agent: - search_products returns part_count, volume_cm3, complexity, largest_part - query_database tool description documents cad_metadata schema Frontend: - ProductDetail page: CAD Metadata section with stat cards (parts, volume, surface area, complexity, triangles, assembly depth) - Admin System Tools: "Re-extract Rich Metadata" button for backfill Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -109,6 +109,57 @@ def process_step_file(self, cad_file_id: str):
|
||||
pl.step_error("process_step_file", f"STEP metadata extraction failed: {exc}", exc)
|
||||
r.delete(lock_key) # release lock so a retry can proceed
|
||||
raise self.retry(exc=exc, countdown=60, max_retries=3)
|
||||
|
||||
# Extract rich metadata (volume, surface area, complexity, etc.) — non-fatal
|
||||
try:
|
||||
from sqlalchemy import create_engine, update as sql_update
|
||||
from sqlalchemy.orm import Session as SyncSession
|
||||
from app.config import settings as cfg
|
||||
from app.services.step_processor import extract_rich_metadata
|
||||
from app.models.cad_file import CadFile
|
||||
from app.models.product import Product
|
||||
from app.core.tenant_context import set_tenant_context_sync
|
||||
|
||||
eng = create_engine(cfg.database_url_sync)
|
||||
try:
|
||||
# Load stored_path for the cad file
|
||||
with SyncSession(eng) as session:
|
||||
set_tenant_context_sync(session, _tenant_id)
|
||||
cad_file = session.get(CadFile, cad_file_id)
|
||||
step_path = cad_file.stored_path if cad_file else None
|
||||
|
||||
if step_path:
|
||||
rich_meta = extract_rich_metadata(str(step_path))
|
||||
if rich_meta and rich_meta.get("part_count", 0) > 0:
|
||||
with SyncSession(eng) as session:
|
||||
set_tenant_context_sync(session, _tenant_id)
|
||||
# Merge into cad_files.mesh_attributes
|
||||
cad_file = session.get(CadFile, cad_file_id)
|
||||
if cad_file:
|
||||
existing_attrs = cad_file.mesh_attributes or {}
|
||||
existing_attrs["rich_metadata"] = rich_meta
|
||||
session.execute(
|
||||
sql_update(CadFile)
|
||||
.where(CadFile.id == cad_file_id)
|
||||
.values(mesh_attributes=existing_attrs)
|
||||
)
|
||||
# Update all active products linked to this CAD file
|
||||
session.execute(
|
||||
sql_update(Product)
|
||||
.where(Product.cad_file_id == cad_file_id, Product.is_active.is_(True))
|
||||
.values(cad_metadata=rich_meta)
|
||||
)
|
||||
session.commit()
|
||||
logger.info(
|
||||
f"Rich metadata extracted for cad_file {cad_file_id}: "
|
||||
f"{rich_meta.get('part_count')} parts, "
|
||||
f"{rich_meta.get('total_volume_cm3', 0):.1f} cm³"
|
||||
)
|
||||
finally:
|
||||
eng.dispose()
|
||||
except Exception:
|
||||
logger.exception(f"Rich metadata extraction failed for cad_file {cad_file_id} (non-fatal)")
|
||||
|
||||
finally:
|
||||
r.delete(lock_key) # always release on completion or unhandled error
|
||||
|
||||
@@ -203,6 +254,70 @@ def _auto_populate_materials_for_cad(cad_file_id: str, tenant_id: str | None = N
|
||||
eng.dispose()
|
||||
|
||||
|
||||
@celery_app.task(name="app.tasks.step_tasks.reextract_rich_metadata_task", queue="step_processing")
|
||||
def reextract_rich_metadata_task():
|
||||
"""Batch re-extract rich metadata (volume, surface area, complexity) for all completed CAD files."""
|
||||
from sqlalchemy import create_engine, select as sql_select, update as sql_update
|
||||
from sqlalchemy.orm import Session as SyncSession
|
||||
from app.config import settings as cfg
|
||||
from app.models.cad_file import CadFile, ProcessingStatus
|
||||
from app.models.product import Product
|
||||
from app.core.tenant_context import set_tenant_context_sync
|
||||
|
||||
sync_url = cfg.database_url.replace("+asyncpg", "")
|
||||
eng = create_engine(sync_url)
|
||||
updated = 0
|
||||
failed = 0
|
||||
|
||||
try:
|
||||
with SyncSession(eng) as session:
|
||||
cad_files = session.execute(
|
||||
sql_select(CadFile).where(
|
||||
CadFile.processing_status == ProcessingStatus.completed,
|
||||
CadFile.stored_path.isnot(None),
|
||||
)
|
||||
).scalars().all()
|
||||
cad_entries = [(str(cf.id), cf.stored_path, cf.tenant_id) for cf in cad_files]
|
||||
|
||||
for cad_file_id, step_path, tenant_id in cad_entries:
|
||||
try:
|
||||
from app.services.step_processor import extract_rich_metadata
|
||||
rich_meta = extract_rich_metadata(str(step_path))
|
||||
if rich_meta and rich_meta.get("part_count", 0) > 0:
|
||||
with SyncSession(eng) as session:
|
||||
set_tenant_context_sync(session, tenant_id)
|
||||
# Update mesh_attributes on cad_file
|
||||
cad_file = session.get(CadFile, cad_file_id)
|
||||
if cad_file:
|
||||
existing_attrs = cad_file.mesh_attributes or {}
|
||||
existing_attrs["rich_metadata"] = rich_meta
|
||||
session.execute(
|
||||
sql_update(CadFile)
|
||||
.where(CadFile.id == cad_file_id)
|
||||
.values(mesh_attributes=existing_attrs)
|
||||
)
|
||||
# Update all active products linked to this CAD file
|
||||
session.execute(
|
||||
sql_update(Product)
|
||||
.where(Product.cad_file_id == cad_file_id, Product.is_active.is_(True))
|
||||
.values(cad_metadata=rich_meta)
|
||||
)
|
||||
session.commit()
|
||||
updated += 1
|
||||
logger.info(
|
||||
f"reextract_rich_metadata: {cad_file_id} -> "
|
||||
f"{rich_meta.get('part_count')} parts, "
|
||||
f"{rich_meta.get('total_volume_cm3', 0):.1f} cm3"
|
||||
)
|
||||
except Exception:
|
||||
failed += 1
|
||||
logger.exception(f"reextract_rich_metadata failed for cad_file {cad_file_id}")
|
||||
finally:
|
||||
eng.dispose()
|
||||
|
||||
logger.info(f"reextract_rich_metadata_task complete: {updated} updated, {failed} failed")
|
||||
|
||||
|
||||
@celery_app.task(name="app.tasks.step_tasks.reextract_cad_metadata", queue="asset_pipeline")
|
||||
def reextract_cad_metadata(cad_file_id: str):
|
||||
"""Re-extract bounding-box dimensions for an already-completed CAD file.
|
||||
|
||||
Reference in New Issue
Block a user