feat: duplicate product detection — STEP conflict warnings on Excel import and CAD upload

- Excel preview detects when a product already has a different STEP file linked
- Excel preview detects intra-Excel conflicts (same product, different CAD model names)
- Product STEP upload warns when replacing an existing file and shows render count
- All warnings are non-blocking (amber badges, toast warnings)
- LEARNINGS.md: all open items resolved

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-14 13:05:40 +01:00
parent f0dd952f63
commit b6bac080bb
10 changed files with 207 additions and 173 deletions
+25
View File
@@ -470,6 +470,11 @@ async def upload_product_cad(
if not product:
raise HTTPException(404, detail="Product not found")
# Check for STEP replacement warnings before proceeding
warnings: list[str] = []
existing_render_count = 0
old_cad_file_id = product.cad_file_id
content = await file.read()
file_hash = hashlib.sha256(content).hexdigest()
@@ -477,6 +482,24 @@ async def upload_product_cad(
existing_cad = await db.execute(select(CadFile).where(CadFile.file_hash == file_hash))
cad_file = existing_cad.scalar_one_or_none()
# Detect replacement: product already has a different CAD file
if old_cad_file_id and (cad_file is None or cad_file.id != old_cad_file_id):
old_name = product.cad_file.original_name if product.cad_file else "unknown"
warnings.append(
f"Replacing existing STEP file '{old_name}' with '{file.filename}'."
)
# Count existing renders (MediaAssets) for this product
from app.domains.media.models import MediaAsset
render_count_result = await db.execute(
select(func.count(MediaAsset.id)).where(MediaAsset.product_id == product_id)
)
existing_render_count = render_count_result.scalar() or 0
if existing_render_count > 0:
warnings.append(
f"This product has {existing_render_count} existing render(s) that were "
"generated from the previous STEP file. They may no longer match."
)
if cad_file is None:
step_dir = Path(settings.upload_dir) / "step_files"
step_dir.mkdir(parents=True, exist_ok=True)
@@ -511,6 +534,8 @@ async def upload_product_cad(
"file_hash": file_hash,
"status": "uploaded" if cad_file.processing_status == ProcessingStatus.pending else "already_exists",
"product_id": str(product_id),
"warnings": warnings,
"existing_render_count": existing_render_count,
}
+18
View File
@@ -39,6 +39,14 @@ class ExcelPreviewRow(BaseModel):
has_step: bool = False
is_duplicate: bool = False
duplicate_of_row: int | None = None
# STEP conflict: existing product has a different STEP file than Excel row's name_cad_modell
step_conflict: bool = False
step_conflict_existing_name: str | None = None
step_conflict_excel_name: str | None = None
# Intra-Excel conflict: same product key appears with different name_cad_modell
cad_name_conflict: bool = False
cad_name_conflict_other_name: str | None = None
cad_name_conflict_row: int | None = None
class ExcelPreviewResponse(BaseModel):
@@ -52,6 +60,8 @@ class ExcelPreviewResponse(BaseModel):
has_step_count: int = 0
no_step_count: int = 0
duplicate_count: int = 0
step_conflict_count: int = 0
cad_name_conflict_count: int = 0
warnings: list[str]
rows: list[ExcelPreviewRow]
column_headers: list[str] = []
@@ -145,6 +155,12 @@ async def upload_excel(
has_step=r.get("has_step", False),
is_duplicate=r.get("is_duplicate", False),
duplicate_of_row=r.get("duplicate_of_row"),
step_conflict=r.get("step_conflict", False),
step_conflict_existing_name=r.get("step_conflict_existing_name"),
step_conflict_excel_name=r.get("step_conflict_excel_name"),
cad_name_conflict=r.get("cad_name_conflict", False),
cad_name_conflict_other_name=r.get("cad_name_conflict_other_name"),
cad_name_conflict_row=r.get("cad_name_conflict_row"),
)
for r in preview.rows
]
@@ -195,6 +211,8 @@ async def upload_excel(
has_step_count=preview.has_step_count,
no_step_count=preview.no_step_count,
duplicate_count=preview.duplicate_count,
step_conflict_count=preview.step_conflict_count,
cad_name_conflict_count=preview.cad_name_conflict_count,
warnings=all_warnings,
rows=annotated_rows,
column_headers=parsed_dict.get("column_headers", []),
+7 -2
View File
@@ -2,6 +2,7 @@
import uuid
from sqlalchemy import select, func, update as sql_update
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import selectinload
from app.domains.products.models import Product
@@ -48,7 +49,9 @@ async def lookup_product(
"""
if produkt_baureihe:
result = await db.execute(
select(Product).where(
select(Product)
.options(selectinload(Product.cad_file))
.where(
func.lower(Product.produkt_baureihe) == produkt_baureihe.lower(),
Product.is_active.is_(True),
)
@@ -61,7 +64,9 @@ async def lookup_product(
if pim_id:
result = await db.execute(
select(Product).where(Product.pim_id == pim_id, Product.is_active.is_(True))
select(Product)
.options(selectinload(Product.cad_file))
.where(Product.pim_id == pim_id, Product.is_active.is_(True))
)
return result.scalar_one_or_none()
+52 -5
View File
@@ -1,5 +1,6 @@
"""Excel import service — maps parsed rows to Product library."""
from dataclasses import dataclass, field
from pathlib import PurePosixPath
from sqlalchemy.ext.asyncio import AsyncSession
from app.services.product_service import (
@@ -8,6 +9,13 @@ from app.services.product_service import (
)
def _stem_lower(name: str | None) -> str:
"""Return the lowercased stem (no extension) of a filename."""
if not name:
return ""
return PurePosixPath(name).stem.lower()
@dataclass
class PreviewResult:
"""Read-only preview: annotates rows without creating anything."""
@@ -18,6 +26,8 @@ class PreviewResult:
has_step_count: int = 0
no_step_count: int = 0
duplicate_count: int = 0
step_conflict_count: int = 0
cad_name_conflict_count: int = 0
warnings: list[str] = field(default_factory=list)
@@ -118,8 +128,8 @@ async def preview_excel_rows(
"""
result = PreviewResult()
# Track unique identifiers we've already resolved in this batch
# key = lower(baureihe) or pim_id → (product_exists, product_id_str | None, has_step, first_row_index)
seen: dict[str, tuple[bool, str | None, bool, int]] = {}
# key = lower(baureihe) or pim_id → (product_exists, product_id_str | None, has_step, first_row_index, name_cad_modell_stem)
seen: dict[str, tuple[bool, str | None, bool, int, str]] = {}
for row in parsed_rows:
pim_id = row.get("pim_id")
@@ -127,6 +137,14 @@ async def preview_excel_rows(
row_index = row.get("row_index", 0)
row["category_key"] = row.get("category_key") or category_key
# Default conflict fields
row["step_conflict"] = False
row["step_conflict_existing_name"] = None
row["step_conflict_excel_name"] = None
row["cad_name_conflict"] = False
row["cad_name_conflict_other_name"] = None
row["cad_name_conflict_row"] = None
# Must have at least one identifier
if not pim_id and not produkt_baureihe:
row["product_exists"] = False
@@ -139,13 +157,24 @@ async def preview_excel_rows(
# Build a cache key
cache_key = (produkt_baureihe or "").lower() or pim_id or ""
excel_cad_name = row.get("name_cad_modell")
excel_cad_stem = _stem_lower(excel_cad_name)
if cache_key in seen:
exists, pid, has_step, first_row = seen[cache_key]
exists, pid, has_step, first_row, first_cad_stem = seen[cache_key]
row["product_exists"] = exists
row["product_id"] = pid
row["has_step"] = has_step
row["is_duplicate"] = True
row["duplicate_of_row"] = first_row
# Intra-Excel conflict: same product key, different name_cad_modell
if excel_cad_stem and first_cad_stem and excel_cad_stem != first_cad_stem:
row["cad_name_conflict"] = True
row["cad_name_conflict_other_name"] = first_cad_stem
row["cad_name_conflict_row"] = first_row
result.cad_name_conflict_count += 1
result.duplicate_count += 1
continue
@@ -156,17 +185,26 @@ async def preview_excel_rows(
row["product_exists"] = True
row["product_id"] = str(product.id)
row["has_step"] = has_step
seen[cache_key] = (True, str(product.id), has_step, row_index)
seen[cache_key] = (True, str(product.id), has_step, row_index, excel_cad_stem)
result.existing_product_count += 1
if has_step:
result.has_step_count += 1
else:
result.no_step_count += 1
# STEP conflict: product already has a different STEP file
if has_step and excel_cad_stem and product.cad_file:
existing_stem = _stem_lower(product.cad_file.original_name)
if existing_stem and existing_stem != excel_cad_stem:
row["step_conflict"] = True
row["step_conflict_existing_name"] = existing_stem
row["step_conflict_excel_name"] = excel_cad_stem
result.step_conflict_count += 1
else:
row["product_exists"] = False
row["product_id"] = None
row["has_step"] = False
seen[cache_key] = (False, None, False, row_index)
seen[cache_key] = (False, None, False, row_index, excel_cad_stem)
result.new_product_count += 1
result.no_step_count += 1
@@ -176,4 +214,13 @@ async def preview_excel_rows(
f"{result.duplicate_count} duplicate Produkt-Baureihe row(s) detected — "
"these are pre-unchecked. Only one row per product will be imported."
)
if result.step_conflict_count > 0:
result.warnings.append(
f"{result.step_conflict_count} product(s) already have a different STEP file linked — "
"importing will not replace the existing STEP file automatically."
)
if result.cad_name_conflict_count > 0:
result.warnings.append(
f"{result.cad_name_conflict_count} row(s) reference the same product with a different CAD model name."
)
return result