feat: duplicate product detection — STEP conflict warnings on Excel import and CAD upload

- Excel preview detects when a product already has a different STEP file linked - Excel preview detects intra-Excel conflicts (same product, different CAD model names) - Product STEP upload warns when replacing an existing file and shows render count - All warnings are non-blocking (amber badges, toast warnings) - LEARNINGS.md: all open items resolved Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-14 13:05:40 +01:00
parent f0dd952f63
commit b6bac080bb
10 changed files with 207 additions and 173 deletions
@@ -470,6 +470,11 @@ async def upload_product_cad(
    if not product:
        raise HTTPException(404, detail="Product not found")

+    # Check for STEP replacement warnings before proceeding
+    warnings: list[str] = []
+    existing_render_count = 0
+    old_cad_file_id = product.cad_file_id
+
    content = await file.read()
    file_hash = hashlib.sha256(content).hexdigest()

@@ -477,6 +482,24 @@ async def upload_product_cad(
    existing_cad = await db.execute(select(CadFile).where(CadFile.file_hash == file_hash))
    cad_file = existing_cad.scalar_one_or_none()

+    # Detect replacement: product already has a different CAD file
+    if old_cad_file_id and (cad_file is None or cad_file.id != old_cad_file_id):
+        old_name = product.cad_file.original_name if product.cad_file else "unknown"
+        warnings.append(
+            f"Replacing existing STEP file '{old_name}' with '{file.filename}'."
+        )
+        # Count existing renders (MediaAssets) for this product
+        from app.domains.media.models import MediaAsset
+        render_count_result = await db.execute(
+            select(func.count(MediaAsset.id)).where(MediaAsset.product_id == product_id)
+        )
+        existing_render_count = render_count_result.scalar() or 0
+        if existing_render_count > 0:
+            warnings.append(
+                f"This product has {existing_render_count} existing render(s) that were "
+                "generated from the previous STEP file. They may no longer match."
+            )
+
    if cad_file is None:
        step_dir = Path(settings.upload_dir) / "step_files"
        step_dir.mkdir(parents=True, exist_ok=True)
@@ -511,6 +534,8 @@ async def upload_product_cad(
        "file_hash": file_hash,
        "status": "uploaded" if cad_file.processing_status == ProcessingStatus.pending else "already_exists",
        "product_id": str(product_id),
+        "warnings": warnings,
+        "existing_render_count": existing_render_count,
    }


@@ -39,6 +39,14 @@ class ExcelPreviewRow(BaseModel):
    has_step: bool = False
    is_duplicate: bool = False
    duplicate_of_row: int | None = None
+    # STEP conflict: existing product has a different STEP file than Excel row's name_cad_modell
+    step_conflict: bool = False
+    step_conflict_existing_name: str | None = None
+    step_conflict_excel_name: str | None = None
+    # Intra-Excel conflict: same product key appears with different name_cad_modell
+    cad_name_conflict: bool = False
+    cad_name_conflict_other_name: str | None = None
+    cad_name_conflict_row: int | None = None


 class ExcelPreviewResponse(BaseModel):
@@ -52,6 +60,8 @@ class ExcelPreviewResponse(BaseModel):
    has_step_count: int = 0
    no_step_count: int = 0
    duplicate_count: int = 0
+    step_conflict_count: int = 0
+    cad_name_conflict_count: int = 0
    warnings: list[str]
    rows: list[ExcelPreviewRow]
    column_headers: list[str] = []
@@ -145,6 +155,12 @@ async def upload_excel(
            has_step=r.get("has_step", False),
            is_duplicate=r.get("is_duplicate", False),
            duplicate_of_row=r.get("duplicate_of_row"),
+            step_conflict=r.get("step_conflict", False),
+            step_conflict_existing_name=r.get("step_conflict_existing_name"),
+            step_conflict_excel_name=r.get("step_conflict_excel_name"),
+            cad_name_conflict=r.get("cad_name_conflict", False),
+            cad_name_conflict_other_name=r.get("cad_name_conflict_other_name"),
+            cad_name_conflict_row=r.get("cad_name_conflict_row"),
        )
        for r in preview.rows
    ]
@@ -195,6 +211,8 @@ async def upload_excel(
        has_step_count=preview.has_step_count,
        no_step_count=preview.no_step_count,
        duplicate_count=preview.duplicate_count,
+        step_conflict_count=preview.step_conflict_count,
+        cad_name_conflict_count=preview.cad_name_conflict_count,
        warnings=all_warnings,
        rows=annotated_rows,
        column_headers=parsed_dict.get("column_headers", []),
@@ -2,6 +2,7 @@
 import uuid
 from sqlalchemy import select, func, update as sql_update
 from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm import selectinload

 from app.domains.products.models import Product

@@ -48,7 +49,9 @@ async def lookup_product(
    """
    if produkt_baureihe:
        result = await db.execute(
-            select(Product).where(
+            select(Product)
+            .options(selectinload(Product.cad_file))
+            .where(
                func.lower(Product.produkt_baureihe) == produkt_baureihe.lower(),
                Product.is_active.is_(True),
            )
@@ -61,7 +64,9 @@ async def lookup_product(

    if pim_id:
        result = await db.execute(
-            select(Product).where(Product.pim_id == pim_id, Product.is_active.is_(True))
+            select(Product)
+            .options(selectinload(Product.cad_file))
+            .where(Product.pim_id == pim_id, Product.is_active.is_(True))
        )
        return result.scalar_one_or_none()

@@ -1,5 +1,6 @@
 """Excel import service — maps parsed rows to Product library."""
 from dataclasses import dataclass, field
+from pathlib import PurePosixPath
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.services.product_service import (
@@ -8,6 +9,13 @@ from app.services.product_service import (
 )


+def _stem_lower(name: str | None) -> str:
+    """Return the lowercased stem (no extension) of a filename."""
+    if not name:
+        return ""
+    return PurePosixPath(name).stem.lower()
+
+
@dataclass
 class PreviewResult:
    """Read-only preview: annotates rows without creating anything."""
@@ -18,6 +26,8 @@ class PreviewResult:
    has_step_count: int = 0
    no_step_count: int = 0
    duplicate_count: int = 0
+    step_conflict_count: int = 0
+    cad_name_conflict_count: int = 0
    warnings: list[str] = field(default_factory=list)


@@ -118,8 +128,8 @@ async def preview_excel_rows(
    """
    result = PreviewResult()
    # Track unique identifiers we've already resolved in this batch
-    # key = lower(baureihe) or pim_id  →  (product_exists, product_id_str | None, has_step, first_row_index)
-    seen: dict[str, tuple[bool, str | None, bool, int]] = {}
+    # key = lower(baureihe) or pim_id  →  (product_exists, product_id_str | None, has_step, first_row_index, name_cad_modell_stem)
+    seen: dict[str, tuple[bool, str | None, bool, int, str]] = {}

    for row in parsed_rows:
        pim_id = row.get("pim_id")
@@ -127,6 +137,14 @@ async def preview_excel_rows(
        row_index = row.get("row_index", 0)
        row["category_key"] = row.get("category_key") or category_key

+        # Default conflict fields
+        row["step_conflict"] = False
+        row["step_conflict_existing_name"] = None
+        row["step_conflict_excel_name"] = None
+        row["cad_name_conflict"] = False
+        row["cad_name_conflict_other_name"] = None
+        row["cad_name_conflict_row"] = None
+
        # Must have at least one identifier
        if not pim_id and not produkt_baureihe:
            row["product_exists"] = False
@@ -139,13 +157,24 @@ async def preview_excel_rows(
        # Build a cache key
        cache_key = (produkt_baureihe or "").lower() or pim_id or ""

+        excel_cad_name = row.get("name_cad_modell")
+        excel_cad_stem = _stem_lower(excel_cad_name)
+
        if cache_key in seen:
-            exists, pid, has_step, first_row = seen[cache_key]
+            exists, pid, has_step, first_row, first_cad_stem = seen[cache_key]
            row["product_exists"] = exists
            row["product_id"] = pid
            row["has_step"] = has_step
            row["is_duplicate"] = True
            row["duplicate_of_row"] = first_row
+
+            # Intra-Excel conflict: same product key, different name_cad_modell
+            if excel_cad_stem and first_cad_stem and excel_cad_stem != first_cad_stem:
+                row["cad_name_conflict"] = True
+                row["cad_name_conflict_other_name"] = first_cad_stem
+                row["cad_name_conflict_row"] = first_row
+                result.cad_name_conflict_count += 1
+
            result.duplicate_count += 1
            continue

@@ -156,17 +185,26 @@ async def preview_excel_rows(
            row["product_exists"] = True
            row["product_id"] = str(product.id)
            row["has_step"] = has_step
-            seen[cache_key] = (True, str(product.id), has_step, row_index)
+            seen[cache_key] = (True, str(product.id), has_step, row_index, excel_cad_stem)
            result.existing_product_count += 1
            if has_step:
                result.has_step_count += 1
            else:
                result.no_step_count += 1
+
+            # STEP conflict: product already has a different STEP file
+            if has_step and excel_cad_stem and product.cad_file:
+                existing_stem = _stem_lower(product.cad_file.original_name)
+                if existing_stem and existing_stem != excel_cad_stem:
+                    row["step_conflict"] = True
+                    row["step_conflict_existing_name"] = existing_stem
+                    row["step_conflict_excel_name"] = excel_cad_stem
+                    result.step_conflict_count += 1
        else:
            row["product_exists"] = False
            row["product_id"] = None
            row["has_step"] = False
-            seen[cache_key] = (False, None, False, row_index)
+            seen[cache_key] = (False, None, False, row_index, excel_cad_stem)
            result.new_product_count += 1
            result.no_step_count += 1

@@ -176,4 +214,13 @@ async def preview_excel_rows(
            f"{result.duplicate_count} duplicate Produkt-Baureihe row(s) detected — "
            "these are pre-unchecked. Only one row per product will be imported."
        )
+    if result.step_conflict_count > 0:
+        result.warnings.append(
+            f"{result.step_conflict_count} product(s) already have a different STEP file linked — "
+            "importing will not replace the existing STEP file automatically."
+        )
+    if result.cad_name_conflict_count > 0:
+        result.warnings.append(
+            f"{result.cad_name_conflict_count} row(s) reference the same product with a different CAD model name."
+        )
    return result