fix(glb): 175/175 material substitution via _AFN suffix stripping

OCC's RWGltf_CafWriter appends _AF0/_AF1 assembly-instance suffixes to
mesh object names when a part appears multiple times in an assembly.
The material matching in export_gltf.py only stripped Blender's .001
suffix, leaving 24/175 GLB objects without materials.

Fix: strip _AFN suffixes via while loop (handles nested _AF0_AF1),
add prefix fallback (longest key wins) as last resort before no-match.

Also improve build_materials_from_excel Jaccard matching:
- Strip _AFN and numeric hash suffixes (-21227) before tokenizing
- Add prefix-based fallback (step 3) before position fallback (step 4)
- Raise threshold 0.3 → 0.35 for better precision
- Guard prefix matches to len >= 5 to prevent trivial false positives

Result: Material substitution: 175/175 mesh objects assigned (was 151/175)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-07 22:15:11 +01:00
parent 95cfe0aa93
commit 934728da77
2 changed files with 100 additions and 14 deletions
+29 -7
View File
@@ -430,11 +430,18 @@ async def regenerate_product_thumbnail(
def _normalize_part_token_name(name: str) -> str:
"""Lowercase, strip .prt extension, normalise separators to underscore."""
"""Lowercase, strip .prt extension and noise suffixes, normalise separators to underscore."""
import re as _re
name = name.lower().strip()
if name.endswith(".prt"):
name = name[:-4]
# Strip OCC assembly-instance suffixes (_AF0, _AF1 …) that RWGltf_CafWriter adds
prev = None
while prev != name:
prev = name
name = _re.sub(r"_af\d+$", "", name)
# Strip trailing numeric hash suffixes from Excel .prt filenames (e.g. -21227)
name = _re.sub(r"-\d{4,}$", "", name)
# Hyphens and dots → underscores for uniform token splitting
return _re.sub(r"[-.]", "_", name)
@@ -456,23 +463,24 @@ def _jaccard(a: set, b: set) -> float:
def build_materials_from_excel(
cad_parts: list[str],
excel_components: list[dict],
similarity_threshold: float = 0.3,
similarity_threshold: float = 0.35,
) -> list[dict]:
"""Match CAD part names to Excel components and return cad_part_materials list.
Pure function — no DB access, sync-safe, callable from Celery tasks.
Matching strategy per CAD part:
Matching strategy per CAD part (in order):
1. Exact case-insensitive name match
2. Token-based Jaccard similarity on normalised filenames
3. Position-based fallback for low-confidence matches
3. Prefix-based fallback (one normalised name is a prefix of the other)
4. Position-based fallback for remaining low-confidence cases
"""
excel_entries: list[tuple[set[str], str, str]] = []
excel_entries: list[tuple[set[str], str, str, str]] = []
for c in excel_components:
raw = (c.get("part_name") or "").lower().strip()
norm = _normalize_part_token_name(raw)
tokens = _part_tokens(norm)
excel_entries.append((tokens, raw, c.get("material") or ""))
excel_entries.append((tokens, raw, c.get("material") or "", norm))
new_materials: list[dict] = []
for i, cad_part in enumerate(cad_parts):
@@ -483,16 +491,30 @@ def build_materials_from_excel(
best_mat = ""
best_score = 0.0
for tokens, raw, material in excel_entries:
for tokens, raw, material, excel_norm in excel_entries:
# 1. Exact match
if raw == cad_raw_lower:
best_mat = material
best_score = 1.0
break
# 2. Jaccard similarity
score = _jaccard(tokens, cad_tokens)
if score > best_score:
best_score = score
best_mat = material
# 3. Prefix fallback when Jaccard is below threshold but not zero:
# one normalised name starts with the other (handles sub-assembly variants)
if best_score < similarity_threshold:
for tokens, raw, material, excel_norm in excel_entries:
if len(excel_norm) >= 5 and len(cad_norm) >= 5 and (
cad_norm.startswith(excel_norm) or excel_norm.startswith(cad_norm)
):
best_mat = material
best_score = 0.7
break
# 4. Position-based fallback
if best_score < similarity_threshold:
if i < len(excel_components):
best_mat = excel_components[i].get("material") or ""
+71 -7
View File
@@ -32,6 +32,8 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--output_path", required=True)
parser.add_argument("--asset_library_blend", default=None)
parser.add_argument("--material_map", default="{}")
parser.add_argument("--smooth_angle", type=float, default=30.0,
help="Auto-smooth angle in degrees (default 30)")
return parser.parse_args(rest)
@@ -50,24 +52,86 @@ def main() -> None:
print(f"Imported geometry GLB: {args.glb_path} "
f"({len([o for o in bpy.data.objects if o.type == 'MESH'])} mesh objects)")
# Apply smooth shading with 30° angle threshold (Blender 4.1+ API)
# Apply smooth shading using the configured angle threshold
smooth_rad = _math.radians(args.smooth_angle)
print(f"Applying smooth shading at {args.smooth_angle}° ({smooth_rad:.3f} rad)")
for obj in bpy.data.objects:
if obj.type == "MESH":
bpy.context.view_layer.objects.active = obj
obj.select_set(True)
try:
bpy.ops.object.shade_smooth_by_angle(angle=_math.radians(30))
bpy.ops.object.shade_smooth_by_angle(angle=smooth_rad)
except Exception:
pass
# Fallback for older Blender API
bpy.ops.object.shade_smooth()
if obj.data.use_auto_smooth is not None:
obj.data.use_auto_smooth = True
obj.data.auto_smooth_angle = smooth_rad
# Apply asset library materials if provided.
# link=False (append) is required: the GLTF exporter can only traverse
# local (appended) Principled BSDF node trees to extract PBR values.
#
# IMPORTANT: OCC-exported GLBs name materials generically (mat_0, mat_1, …)
# but preserve STEP part names as mesh OBJECT names. We therefore match by
# obj.name, not by slot.material.name (which is how blender_render.py works).
if args.asset_library_blend and material_map:
import os
sys.path.insert(0, os.path.dirname(__file__))
from asset_library import apply_asset_library_materials
apply_asset_library_materials(args.asset_library_blend, material_map, link=False)
import re as _re
mat_map_lower = {k.lower().strip(): v for k, v in material_map.items()}
needed = set(mat_map_lower.values())
# Append materials from library (link=False so glTF exporter can read nodes)
appended: dict = {}
for mat_name in needed:
try:
bpy.ops.wm.append(
filepath=f"{args.asset_library_blend}/Material/{mat_name}",
directory=f"{args.asset_library_blend}/Material/",
filename=mat_name,
link=False,
)
if mat_name in bpy.data.materials:
appended[mat_name] = bpy.data.materials[mat_name]
print(f"Appended material: {mat_name}")
else:
print(f"WARNING: material '{mat_name}' not found in library after append",
file=sys.stderr)
except Exception as exc:
print(f"WARNING: failed to append material '{mat_name}': {exc}", file=sys.stderr)
if appended:
assigned = 0
mesh_objects = [o for o in bpy.data.objects if o.type == "MESH"]
for obj in mesh_objects:
# Strip Blender's .001/.002 deduplication suffix
base_name = _re.sub(r'\.\d{3}$', '', obj.name)
# Strip OCC assembly-instance suffix (_AF0, _AF1, … added by
# RWGltf_CafWriter when the same part appears multiple times).
# Apply repeatedly in case of nested suffixes (_AF0_AF1, etc.)
prev = None
while prev != base_name:
prev = base_name
base_name = _re.sub(r'_AF\d+$', '', base_name, flags=_re.IGNORECASE)
mat_name = mat_map_lower.get(base_name.lower().strip())
# Prefix fallback: some sub-assembly nodes have names that
# extend a known key (e.g. key="Ring" matches "Ring_inner_AF0").
# Sort by key length descending so the most-specific key wins.
if not mat_name:
lower_base = base_name.lower().strip()
for key, val in sorted(mat_map_lower.items(), key=lambda x: len(x[0]), reverse=True):
if len(key) >= 5 and len(lower_base) >= 5 and (
lower_base.startswith(key) or key.startswith(lower_base)
):
mat_name = val
break
if mat_name and mat_name in appended:
obj.data.materials.clear()
obj.data.materials.append(appended[mat_name])
assigned += 1
print(f"Material substitution: {assigned}/{len(mesh_objects)} mesh objects assigned")
# Export production GLB with full PBR material data
try: