Files
HartOMat/backend/app/domains/pipeline/tasks/export_glb.py
T

603 lines
27 KiB
Python

"""GLB/GLTF and USD export tasks.
Covers:
- generate_gltf_geometry_task — OCC STEP → geometry GLB (fast preview)
- generate_usd_master_task — OCC STEP → USD canonical scene (pxr authoring)
"""
import logging
from app.tasks.celery_app import celery_app
from app.core.task_logs import log_task_event
from app.core.pipeline_logger import PipelineLogger
logger = logging.getLogger(__name__)
def _usd_cache_hit_refresh_reason(cad_file, usd_asset, usd_render_path) -> str | None:
"""Reuse the runtime freshness checks before accepting a USD cache hit."""
from app.domains.rendering.workflow_runtime_services import _usd_master_refresh_reason
return _usd_master_refresh_reason(
cad_file,
usd_asset=usd_asset,
usd_render_path=usd_render_path,
)
@celery_app.task(bind=True, name="app.tasks.step_tasks.generate_gltf_geometry_task", queue="asset_pipeline", max_retries=1)
def generate_gltf_geometry_task(
self,
cad_file_id: str,
workflow_run_id: str | None = None,
workflow_node_id: str | None = None,
**_: object,
):
"""Export a geometry GLB directly from STEP via OCC (no STL intermediary).
Pipeline:
1. Reads STEP file directly (no STL needed)
2. Builds color_map from product.cad_part_materials (hex colors)
3. Runs export_step_to_gltf.py (Python/OCP): STEP → GLB with per-part colors
4. Stores result as gltf_geometry MediaAsset (replaces any existing one)
Output is in meters, Y-up (glTF convention).
A Redis dedup lock (TTL=30min) prevents concurrent duplicate tasks for the same file.
"""
import json as _json
import os as _os
import subprocess as _subprocess
import sys as _sys
import redis as _redis_lib
from pathlib import Path as _Path
from sqlalchemy import create_engine, select as _select
from sqlalchemy.orm import Session
from app.config import settings as app_settings
from app.models.cad_file import CadFile
from app.models.system_setting import SystemSetting as _SysSetting
pl = PipelineLogger(task_id=self.request.id)
pl.step_start("export_glb_geometry", {"cad_file_id": cad_file_id})
# Redis dedup lock: prevent concurrent duplicate tasks for the same cad_file_id
_lock_key = f"glb_geometry_lock:{cad_file_id}"
_r = _redis_lib.from_url(app_settings.redis_url)
_acquired = _r.set(_lock_key, "1", nx=True, ex=1800) # 30-min TTL
if not _acquired:
logger.warning("generate_gltf_geometry_task: %s already in-flight — skipping duplicate", cad_file_id)
pl.step_done("export_glb_geometry", result={"skipped": True, "reason": "duplicate"})
return {"skipped": True}
try:
# Resolve tenant context at task start (required for RLS)
from app.core.tenant_context import resolve_tenant_id_for_cad, set_tenant_context_sync
_tenant_id = resolve_tenant_id_for_cad(cad_file_id)
sync_url = app_settings.database_url.replace("+asyncpg", "")
eng = create_engine(sync_url)
with Session(eng) as session:
set_tenant_context_sync(session, _tenant_id)
cad_file = session.get(CadFile, cad_file_id)
if not cad_file or not cad_file.stored_path:
logger.error("generate_gltf_geometry_task: no stored_path for %s", cad_file_id)
return
step_path_str = cad_file.stored_path
# Build hex color_map from product.cad_part_materials
from app.domains.products.models import Product
product = session.execute(
_select(Product).where(Product.cad_file_id == cad_file.id)
).scalar_one_or_none()
color_map: dict[str, str] = {}
product_id = str(product.id) if product else None
if product and product.cad_part_materials:
for entry in product.cad_part_materials:
part_name = entry.get("part_name") or entry.get("name", "")
hex_color = entry.get("hex_color") or entry.get("color", "")
if part_name and hex_color:
color_map[part_name] = hex_color
settings_rows = session.execute(_select(_SysSetting)).scalars().all()
sys_settings = {s.key: s.value for s in settings_rows}
linear_deflection = float(sys_settings.get("scene_linear_deflection", "0.1"))
angular_deflection = float(sys_settings.get("scene_angular_deflection", "0.1"))
tessellation_engine = sys_settings.get("tessellation_engine", "occ")
# Hash-based cache check: skip tessellation if file and settings haven't changed
from app.domains.products.cache_service import compute_step_hash as _compute_step_hash
from app.domains.media.models import MediaAsset, MediaAssetType
import uuid as _uuid_check
_current_hash = _compute_step_hash(str(step_path_str))
_cache_hit_asset_id = None
# Composite cache key includes deflection settings so changing them invalidates cache.
# v5: occurrence-aware part-key stamping for repeated leaf meshes changed.
effective_cache_key = (
f"v5:{_current_hash}:{linear_deflection}:{angular_deflection}:{tessellation_engine}"
if _current_hash else None
)
if effective_cache_key:
existing_geo = session.execute(
_select(MediaAsset).where(
MediaAsset.cad_file_id == _uuid_check.UUID(cad_file_id),
MediaAsset.asset_type == MediaAssetType.gltf_geometry,
)
).scalars().first()
stored_key = (existing_geo.render_config or {}).get("cache_key", "") if existing_geo else ""
if stored_key == effective_cache_key:
_asset_disk_path = _Path(app_settings.upload_dir) / existing_geo.storage_key
if _asset_disk_path.exists():
if cad_file.gltf_path != str(_asset_disk_path):
cad_file.gltf_path = str(_asset_disk_path)
session.commit()
logger.info("[CACHE] cache key match — skipping geometry GLB tessellation for %s", cad_file_id)
pl.step_done("export_glb_geometry", result={"cached": True, "asset_id": str(existing_geo.id)})
_cache_hit_asset_id = str(existing_geo.id)
else:
logger.info("[CACHE] cache key match but asset missing on disk — re-running tessellation for %s", cad_file_id)
else:
# Cache miss: update stored hash so next run can use it
cad_file.step_file_hash = _current_hash
session.commit()
else:
# No hash available: update stored hash and proceed
cad_file.step_file_hash = _current_hash
session.commit()
eng.dispose()
if _cache_hit_asset_id is not None:
# Still chain USD master — it has its own hash-check
try:
generate_usd_master_task.delay(cad_file_id)
except Exception:
logger.debug("Could not queue generate_usd_master_task from cache-hit path (non-fatal)")
try:
from app.domains.rendering.tasks import _update_workflow_run_status
_update_workflow_run_status(
cad_file_id,
"completed",
workflow_run_id=workflow_run_id,
workflow_node_id=workflow_node_id,
)
except Exception:
logger.exception(
"Failed to update workflow state for cached GLB export %s",
cad_file_id,
)
return {"cached": True, "asset_id": _cache_hit_asset_id}
step = _Path(step_path_str)
if not step.exists():
log_task_event(self.request.id, f"Failed: STEP file not found: {step}", "error")
raise RuntimeError(f"STEP file not found: {step}")
output_path = step.parent / f"{step.stem}_geometry.glb"
log_task_event(
self.request.id,
f"Starting OCC GLB export: {len(color_map)} part colors",
"info",
)
# Run export_step_to_gltf.py as a subprocess so OCP imports don't pollute worker state
scripts_dir = _Path(_os.environ.get("RENDER_SCRIPTS_DIR", "/render-scripts"))
script_path = scripts_dir / "export_step_to_gltf.py"
python_bin = _sys.executable
cmd = [
python_bin, str(script_path),
"--step_path", str(step),
"--output_path", str(output_path),
"--color_map", _json.dumps(color_map),
"--linear_deflection", str(linear_deflection),
"--angular_deflection", str(angular_deflection),
"--tessellation_engine", tessellation_engine,
]
log_task_event(
self.request.id,
f"Tessellation ({tessellation_engine}): linear={linear_deflection}mm, angular={angular_deflection}rad",
"info",
)
try:
result = _subprocess.run(cmd, capture_output=True, text=True, timeout=600)
for line in result.stdout.splitlines():
logger.info("[occ-gltf] %s", line)
for line in result.stderr.splitlines():
logger.warning("[occ-gltf stderr] %s", line)
if result.returncode != 0 or not output_path.exists() or output_path.stat().st_size == 0:
raise RuntimeError(
f"export_step_to_gltf.py failed (exit {result.returncode}).\n"
f"STDERR: {result.stderr[-1000:]}"
)
except Exception as exc:
log_task_event(self.request.id, f"Failed: {exc}", "error")
pl.step_error("export_glb_geometry", str(exc), exc)
logger.error("generate_gltf_geometry_task OCC export failed: %s", exc)
raise self.retry(exc=exc, countdown=15)
log_task_event(self.request.id, f"OCC GLB export completed: {output_path.name}", "done")
# --- Store MediaAsset (upsert: update existing to keep stable ID/URL) ---
import uuid as _uuid
from sqlalchemy import create_engine as _ce, select as _sel2
from sqlalchemy.orm import Session as _Session
from app.domains.media.models import MediaAsset, MediaAssetType
_sync_url = app_settings.database_url.replace("+asyncpg", "")
_eng2 = _ce(_sync_url)
with _Session(_eng2) as _sess:
set_tenant_context_sync(_sess, _tenant_id)
_key = str(output_path)
_prefix = str(app_settings.upload_dir).rstrip("/") + "/"
if _key.startswith(_prefix):
_key = _key[len(_prefix):]
_file_size = output_path.stat().st_size if output_path.exists() else None
existing = _sess.execute(
_sel2(MediaAsset).where(
MediaAsset.cad_file_id == _uuid.UUID(cad_file_id),
MediaAsset.asset_type == MediaAssetType.gltf_geometry,
)
).scalars().first()
if existing:
existing.storage_key = _key
existing.mime_type = "model/gltf-binary"
existing.file_size_bytes = _file_size
existing.render_config = {"cache_key": effective_cache_key}
if product_id:
existing.product_id = _uuid.UUID(product_id)
cad_file = _sess.get(CadFile, _uuid.UUID(cad_file_id))
if cad_file is not None:
cad_file.gltf_path = str(output_path)
_sess.commit()
asset_id = str(existing.id)
else:
asset = MediaAsset(
cad_file_id=_uuid.UUID(cad_file_id),
product_id=_uuid.UUID(product_id) if product_id else None,
asset_type=MediaAssetType.gltf_geometry,
storage_key=_key,
mime_type="model/gltf-binary",
file_size_bytes=_file_size,
render_config={"cache_key": effective_cache_key},
)
_sess.add(asset)
cad_file = _sess.get(CadFile, _uuid.UUID(cad_file_id))
if cad_file is not None:
cad_file.gltf_path = str(output_path)
_sess.commit()
asset_id = str(asset.id)
_eng2.dispose()
pl.step_done("export_glb_geometry", result={"glb_path": str(output_path), "asset_id": asset_id})
logger.info("generate_gltf_geometry_task: MediaAsset %s created for cad %s", asset_id, cad_file_id)
try:
from app.domains.rendering.tasks import _update_workflow_run_status
_update_workflow_run_status(
cad_file_id,
"completed",
workflow_run_id=workflow_run_id,
workflow_node_id=workflow_node_id,
)
except Exception:
logger.exception("Failed to update workflow state for GLB export %s", cad_file_id)
# Auto-chain USD master export so the canonical scene is always up to date
try:
generate_usd_master_task.delay(cad_file_id)
logger.info("generate_gltf_geometry_task: queued generate_usd_master_task for %s", cad_file_id)
except Exception:
logger.debug("Could not queue generate_usd_master_task (non-fatal)")
return {"glb_path": str(output_path), "asset_id": asset_id}
finally:
_r.delete(_lock_key)
@celery_app.task(
bind=True,
name="app.tasks.step_tasks.generate_usd_master_task",
queue="asset_pipeline", # needs pxr (usd-core) + OCC — both only in render-worker
max_retries=1,
)
def generate_usd_master_task(self, cad_file_id: str) -> dict:
"""Export a USD master file from STEP via OCC + pxr authoring.
Pipeline:
1. Reads STEP file via export_step_to_usd.py (OCC XCAF + pxr)
2. Writes .usd file alongside the STEP file
3. Stores result as usd_master MediaAsset
4. Parses MANIFEST_JSON from stdout → writes resolved_material_assignments to CadFile
"""
import json as _json
import os as _os
import subprocess as _subprocess
import sys as _sys
import uuid as _uuid
from pathlib import Path as _Path
from sqlalchemy import create_engine as _ce, select as _sel
from sqlalchemy.orm import Session as _Session
from app.config import settings as app_settings
from app.domains.media.models import MediaAsset, MediaAssetType
from app.models.cad_file import CadFile
from app.models.system_setting import SystemSetting
from app.domains.products.models import Product
from app.services.material_service import resolve_material_map
pl = PipelineLogger(task_id=self.request.id)
pl.step_start("usd_master", {"cad_file_id": cad_file_id})
# Redis dedup lock: prevent concurrent duplicate tasks for the same cad_file_id
import redis as _redis_lib
_lock_key = f"usd_master_lock:{cad_file_id}"
_r = _redis_lib.from_url(app_settings.redis_url)
_acquired = _r.set(_lock_key, "1", nx=True, ex=1800) # 30-min TTL
if not _acquired:
logger.warning("generate_usd_master_task: %s already in-flight — skipping duplicate", cad_file_id)
pl.step_done("usd_master", result={"skipped": True, "reason": "duplicate"})
return {"skipped": True}
try:
from app.core.tenant_context import resolve_tenant_id_for_cad, set_tenant_context_sync
_tenant_id = resolve_tenant_id_for_cad(cad_file_id)
sync_url = app_settings.database_url.replace("+asyncpg", "")
eng = _ce(sync_url)
_cache_hit_asset_id: str | None = None
with _Session(eng) as sess:
set_tenant_context_sync(sess, _tenant_id)
cad_file = sess.get(CadFile, cad_file_id)
if not cad_file or not cad_file.stored_path:
logger.error("generate_usd_master_task: no stored_path for %s", cad_file_id)
return {"error": "no stored_path"}
step_path = _Path(cad_file.stored_path)
product = sess.execute(
_sel(Product).where(Product.cad_file_id == cad_file.id)
).scalar_one_or_none()
color_map: dict[str, str] = {}
raw_mat_map: dict[str, str] = {}
if product and product.cad_part_materials:
for entry in product.cad_part_materials:
part_name = entry.get("part_name") or entry.get("name", "")
hex_color = entry.get("hex_color") or entry.get("color", "")
if part_name and hex_color:
color_map[part_name] = hex_color
# Build raw material map for resolve_material_map
raw_material = entry.get("material", "")
if part_name and raw_material:
raw_mat_map[part_name] = raw_material
# Resolve raw material names to HARTOMAT library names via aliases
material_map: dict[str, str] = {}
if raw_mat_map:
material_map = resolve_material_map(raw_mat_map)
logger.info(
"generate_usd_master_task: resolved %d material(s) for material_map",
len(material_map),
)
settings_rows = sess.execute(_sel(SystemSetting)).scalars().all()
sys_settings = {s.key: s.value for s in settings_rows}
linear_deflection = float(sys_settings.get("render_linear_deflection", "0.03"))
angular_deflection = float(sys_settings.get("render_angular_deflection", "0.05"))
sharp_threshold = float(sys_settings.get("sharp_edge_threshold", "20.0"))
scripts_dir = _Path(_os.environ.get("RENDER_SCRIPTS_DIR", "/render-scripts"))
script_path = scripts_dir / "export_step_to_usd.py"
materials_helper_path = scripts_dir / "_blender_materials.py"
if not script_path.exists():
err = f"export_step_to_usd.py not found at {script_path}"
pl.step_error("usd_master", err, None)
raise RuntimeError(err)
# Cache must include the active render-script revision. Otherwise
# material resolution fixes never invalidate previously generated USD masters.
script_fingerprint = "unknown"
try:
import hashlib as _hashlib_script
_script_hash = _hashlib_script.sha256()
for candidate in (script_path, materials_helper_path):
if not candidate.exists():
continue
_script_hash.update(candidate.read_bytes())
script_fingerprint = _script_hash.hexdigest()[:12]
except Exception as exc:
logger.warning(
"[USD_MASTER] failed to fingerprint render scripts, falling back to legacy cache key: %s",
exc,
)
# Hash-based cache check: skip tessellation if file and settings haven't changed
from app.domains.products.cache_service import compute_step_hash as _compute_step_hash_usd
_current_hash_usd = _compute_step_hash_usd(str(step_path))
# Composite cache key includes deflection settings and material_map
# so changing either invalidates cache (material primvars are baked into USD)
import hashlib as _hashlib_cache
_mat_hash = _hashlib_cache.md5(
_json.dumps(material_map, sort_keys=True).encode()
).hexdigest()[:12] if material_map else "none"
effective_cache_key = (
f"{_current_hash_usd}:{linear_deflection}:{angular_deflection}:{sharp_threshold}:{_mat_hash}:{script_fingerprint}"
if _current_hash_usd else None
)
if effective_cache_key:
existing_usd = sess.execute(
_sel(MediaAsset).where(
MediaAsset.cad_file_id == cad_file.id,
MediaAsset.asset_type == MediaAssetType.usd_master,
)
).scalars().first()
stored_key = (existing_usd.render_config or {}).get("cache_key", "") if existing_usd else ""
if stored_key == effective_cache_key:
_usd_disk_path = _Path(app_settings.upload_dir) / existing_usd.storage_key
if _usd_disk_path.exists():
refresh_reason = _usd_cache_hit_refresh_reason(
cad_file,
existing_usd,
_usd_disk_path,
)
if refresh_reason is None:
logger.info("[CACHE] cache key match — skipping USD master tessellation for %s", cad_file_id)
pl.step_done("usd_master", result={"cached": True, "asset_id": str(existing_usd.id)})
_cache_hit_asset_id = str(existing_usd.id)
else:
logger.info(
"[CACHE] USD cache key matched for %s but asset is stale (%s) — rebuilding",
cad_file_id,
refresh_reason,
)
else:
logger.info("[CACHE] cache key match but USD asset missing on disk — re-running tessellation for %s", cad_file_id)
else:
# Cache miss: update stored hash so next run can use it
cad_file.step_file_hash = _current_hash_usd
sess.commit()
else:
# No hash available: update stored hash and proceed
cad_file.step_file_hash = _current_hash_usd
sess.commit()
eng.dispose()
if _cache_hit_asset_id is not None:
return {"cached": True, "asset_id": _cache_hit_asset_id}
if not step_path.exists():
err = f"STEP file not found: {step_path}"
pl.step_error("usd_master", err, None)
raise RuntimeError(err)
output_path = step_path.parent / f"{step_path.stem}_master.usd"
cmd = [
_sys.executable, str(script_path),
"--step_path", str(step_path),
"--output_path", str(output_path),
"--color_map", _json.dumps(color_map),
"--linear_deflection", str(linear_deflection),
"--angular_deflection", str(angular_deflection),
"--sharp_threshold", str(sharp_threshold),
"--cad_file_id", cad_file_id,
]
if material_map:
cmd += ["--material_map", _json.dumps(material_map)]
log_task_event(
self.request.id,
f"[USD_MASTER] exporting STEP → USD: {step_path.name}",
"info",
)
try:
result = _subprocess.run(cmd, capture_output=True, text=True, timeout=600)
for line in result.stdout.splitlines():
logger.info("[usd-master] %s", line)
for line in result.stderr.splitlines():
logger.warning("[usd-master stderr] %s", line)
if result.returncode != 0 or not output_path.exists() or output_path.stat().st_size == 0:
raise RuntimeError(
f"export_step_to_usd.py failed (exit {result.returncode}).\n"
f"STDERR: {result.stderr[-1000:]}"
)
except Exception as exc:
log_task_event(self.request.id, f"[USD_MASTER] failed: {exc}", "error")
pl.step_error("usd_master", str(exc), exc)
raise self.retry(exc=exc, countdown=15)
# --- Store MediaAsset (upsert) ---
eng2 = _ce(sync_url)
asset_id: str = ""
with _Session(eng2) as sess2:
set_tenant_context_sync(sess2, _tenant_id)
_key = str(output_path)
_prefix = str(app_settings.upload_dir).rstrip("/") + "/"
if _key.startswith(_prefix):
_key = _key[len(_prefix):]
_file_size = output_path.stat().st_size if output_path.exists() else None
existing = sess2.execute(
_sel(MediaAsset).where(
MediaAsset.cad_file_id == _uuid.UUID(cad_file_id),
MediaAsset.asset_type == MediaAssetType.usd_master,
)
).scalars().first()
if existing:
existing.storage_key = _key
existing.mime_type = "model/vnd.usd"
existing.file_size_bytes = _file_size
existing.render_config = {"cache_key": effective_cache_key}
sess2.commit()
asset_id = str(existing.id)
else:
asset = MediaAsset(
cad_file_id=_uuid.UUID(cad_file_id),
asset_type=MediaAssetType.usd_master,
storage_key=_key,
mime_type="model/vnd.usd",
file_size_bytes=_file_size,
render_config={"cache_key": effective_cache_key},
)
sess2.add(asset)
sess2.commit()
asset_id = str(asset.id)
eng2.dispose()
# --- Parse MANIFEST_JSON and write resolved_material_assignments ---
manifest_parts: list = []
for line in result.stdout.splitlines():
if line.startswith("MANIFEST_JSON: "):
try:
manifest_parts = _json.loads(line[len("MANIFEST_JSON: "):]).get("parts", [])
except Exception as parse_exc:
logger.warning("[USD_MASTER] MANIFEST_JSON parse failed: %s", parse_exc)
break
if manifest_parts:
try:
resolved = {
p["part_key"]: {
"source_name": p["source_name"],
"prim_path": p["prim_path"],
"canonical_material": p.get("canonical_material"),
}
for p in manifest_parts
}
eng3 = _ce(sync_url)
with _Session(eng3) as sess3:
set_tenant_context_sync(sess3, _tenant_id)
row = sess3.get(CadFile, cad_file_id)
if row:
row.resolved_material_assignments = resolved
sess3.commit()
eng3.dispose()
logger.info("[USD_MASTER] wrote resolved_material_assignments (%d parts)", len(resolved))
except Exception as write_exc:
logger.warning("[USD_MASTER] failed to write resolved_material_assignments: %s", write_exc)
log_task_event(self.request.id, f"[USD_MASTER] done: {output_path.name}", "done")
pl.step_done("usd_master", result={"usd_path": str(output_path), "asset_id": asset_id})
return {"usd_path": str(output_path), "asset_id": asset_id, "n_parts": len(manifest_parts)}
finally:
_r.delete(_lock_key)