fix(glb): 175/175 material substitution via _AFN suffix stripping
OCC's RWGltf_CafWriter appends _AF0/_AF1 assembly-instance suffixes to mesh object names when a part appears multiple times in an assembly. The material matching in export_gltf.py only stripped Blender's .001 suffix, leaving 24/175 GLB objects without materials. Fix: strip _AFN suffixes via while loop (handles nested _AF0_AF1), add prefix fallback (longest key wins) as last resort before no-match. Also improve build_materials_from_excel Jaccard matching: - Strip _AFN and numeric hash suffixes (-21227) before tokenizing - Add prefix-based fallback (step 3) before position fallback (step 4) - Raise threshold 0.3 → 0.35 for better precision - Guard prefix matches to len >= 5 to prevent trivial false positives Result: Material substitution: 175/175 mesh objects assigned (was 151/175) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -430,11 +430,18 @@ async def regenerate_product_thumbnail(
|
||||
|
||||
|
||||
def _normalize_part_token_name(name: str) -> str:
|
||||
"""Lowercase, strip .prt extension, normalise separators to underscore."""
|
||||
"""Lowercase, strip .prt extension and noise suffixes, normalise separators to underscore."""
|
||||
import re as _re
|
||||
name = name.lower().strip()
|
||||
if name.endswith(".prt"):
|
||||
name = name[:-4]
|
||||
# Strip OCC assembly-instance suffixes (_AF0, _AF1 …) that RWGltf_CafWriter adds
|
||||
prev = None
|
||||
while prev != name:
|
||||
prev = name
|
||||
name = _re.sub(r"_af\d+$", "", name)
|
||||
# Strip trailing numeric hash suffixes from Excel .prt filenames (e.g. -21227)
|
||||
name = _re.sub(r"-\d{4,}$", "", name)
|
||||
# Hyphens and dots → underscores for uniform token splitting
|
||||
return _re.sub(r"[-.]", "_", name)
|
||||
|
||||
@@ -456,23 +463,24 @@ def _jaccard(a: set, b: set) -> float:
|
||||
def build_materials_from_excel(
|
||||
cad_parts: list[str],
|
||||
excel_components: list[dict],
|
||||
similarity_threshold: float = 0.3,
|
||||
similarity_threshold: float = 0.35,
|
||||
) -> list[dict]:
|
||||
"""Match CAD part names to Excel components and return cad_part_materials list.
|
||||
|
||||
Pure function — no DB access, sync-safe, callable from Celery tasks.
|
||||
|
||||
Matching strategy per CAD part:
|
||||
Matching strategy per CAD part (in order):
|
||||
1. Exact case-insensitive name match
|
||||
2. Token-based Jaccard similarity on normalised filenames
|
||||
3. Position-based fallback for low-confidence matches
|
||||
3. Prefix-based fallback (one normalised name is a prefix of the other)
|
||||
4. Position-based fallback for remaining low-confidence cases
|
||||
"""
|
||||
excel_entries: list[tuple[set[str], str, str]] = []
|
||||
excel_entries: list[tuple[set[str], str, str, str]] = []
|
||||
for c in excel_components:
|
||||
raw = (c.get("part_name") or "").lower().strip()
|
||||
norm = _normalize_part_token_name(raw)
|
||||
tokens = _part_tokens(norm)
|
||||
excel_entries.append((tokens, raw, c.get("material") or ""))
|
||||
excel_entries.append((tokens, raw, c.get("material") or "", norm))
|
||||
|
||||
new_materials: list[dict] = []
|
||||
for i, cad_part in enumerate(cad_parts):
|
||||
@@ -483,16 +491,30 @@ def build_materials_from_excel(
|
||||
best_mat = ""
|
||||
best_score = 0.0
|
||||
|
||||
for tokens, raw, material in excel_entries:
|
||||
for tokens, raw, material, excel_norm in excel_entries:
|
||||
# 1. Exact match
|
||||
if raw == cad_raw_lower:
|
||||
best_mat = material
|
||||
best_score = 1.0
|
||||
break
|
||||
# 2. Jaccard similarity
|
||||
score = _jaccard(tokens, cad_tokens)
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_mat = material
|
||||
|
||||
# 3. Prefix fallback when Jaccard is below threshold but not zero:
|
||||
# one normalised name starts with the other (handles sub-assembly variants)
|
||||
if best_score < similarity_threshold:
|
||||
for tokens, raw, material, excel_norm in excel_entries:
|
||||
if len(excel_norm) >= 5 and len(cad_norm) >= 5 and (
|
||||
cad_norm.startswith(excel_norm) or excel_norm.startswith(cad_norm)
|
||||
):
|
||||
best_mat = material
|
||||
best_score = 0.7
|
||||
break
|
||||
|
||||
# 4. Position-based fallback
|
||||
if best_score < similarity_threshold:
|
||||
if i < len(excel_components):
|
||||
best_mat = excel_components[i].get("material") or ""
|
||||
|
||||
Reference in New Issue
Block a user