Files
HartOMat/backend/tests/test_excel_parser.py
2026-03-05 22:12:38 +01:00

654 lines
27 KiB
Python

"""
Unit tests for app.services.excel_parser.parse_excel
Covers all 7 sample Excel order files:
TRB, Kugellager, CRB, Gleitlager, SRB_TORB, Linear_schiene, Anschlagplatten
Each category class verifies:
- Correct category_key detected
- Correct template_name resolved
- Expected number of data rows (non-empty rows)
- Row indices (first data row is Excel row 4)
- medias_rendering values parsed correctly
- First row standard fields match expected values
- Component count (both per-row and total)
- Component fields (part_name lowercased, material, component_type)
- No unexpected warnings
- parsed_excel_to_dict / parsed_row_to_dict serialisation is correct
The cross-file suite (TestAllFilesStructural) re-runs key invariants
against every file to catch regressions quickly.
The TestParseExcelErrors suite tests ValueError / warning paths without
touching the real Excel files.
"""
from __future__ import annotations
import sys
from pathlib import Path
import pytest
# Ensure backend package is importable when running from any directory.
BACKEND_DIR = Path(__file__).resolve().parent.parent
if str(BACKEND_DIR) not in sys.path:
sys.path.insert(0, str(BACKEND_DIR))
from app.services.excel_parser import (
ParsedExcel,
ParsedRow,
ParsedComponent,
parse_excel,
parsed_excel_to_dict,
parsed_row_to_dict,
_normalize_filename,
_to_bool,
)
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _all_components(parsed: ParsedExcel) -> list[ParsedComponent]:
"""Flatten all components across all rows of a ParsedExcel."""
return [c for row in parsed.rows for c in row.components]
# ---------------------------------------------------------------------------
# TRB — Tapered Roller Bearings
# ---------------------------------------------------------------------------
class TestTRBParser:
"""All assertions derived from TRB_Testscope_20260128.xlsx."""
def test_category_detected(self, parsed_trb: ParsedExcel):
assert parsed_trb.category_key == "TRB"
def test_template_name(self, parsed_trb: ParsedExcel):
assert parsed_trb.template_name == "Tapered Roller Bearings (TRB)"
def test_row_count(self, parsed_trb: ParsedExcel):
assert len(parsed_trb.rows) == 4
def test_row_indices(self, parsed_trb: ParsedExcel):
assert [r.row_index for r in parsed_trb.rows] == [4, 5, 6, 7]
def test_no_warnings(self, parsed_trb: ParsedExcel):
assert parsed_trb.warnings == []
def test_first_row_ebene1(self, parsed_trb: ParsedExcel):
assert parsed_trb.rows[0].ebene1 == "Wälz- und Gleitlager"
def test_first_row_baureihe(self, parsed_trb: ParsedExcel):
assert parsed_trb.rows[0].baureihe == "Kegelrollenlager"
def test_first_row_pim_id(self, parsed_trb: ParsedExcel):
assert parsed_trb.rows[0].pim_id == "2305091021"
def test_first_row_gewaehltes_produkt(self, parsed_trb: ParsedExcel):
assert parsed_trb.rows[0].gewaehltes_produkt == "F-802070.TR4-AM"
def test_all_medias_rendering_true(self, parsed_trb: ParsedExcel):
assert all(r.medias_rendering is True for r in parsed_trb.rows)
def test_first_row_component_count(self, parsed_trb: ParsedExcel):
assert len(parsed_trb.rows[0].components) == 20
def test_total_component_count(self, parsed_trb: ParsedExcel):
assert sum(len(r.components) for r in parsed_trb.rows) == 31
def test_first_component_material(self, parsed_trb: ParsedExcel):
assert parsed_trb.rows[0].components[0].material == "Stahl v2"
def test_part_names_lowercase(self, parsed_trb: ParsedExcel):
for comp in _all_components(parsed_trb):
if comp.part_name:
assert comp.part_name == comp.part_name.lower()
def test_component_column_indices_gte_11(self, parsed_trb: ParsedExcel):
for comp in _all_components(parsed_trb):
assert comp.column_index >= 11
def test_serialisation_keys(self, parsed_trb: ParsedExcel):
d = parsed_excel_to_dict(parsed_trb)
assert d["category_key"] == "TRB"
assert d["row_count"] == 4
assert len(d["rows"]) == 4
def test_serialised_row_has_components_list(self, parsed_trb: ParsedExcel):
d = parsed_excel_to_dict(parsed_trb)
assert isinstance(d["rows"][0]["components"], list)
# ---------------------------------------------------------------------------
# Kugellager — Ball Bearings
# ---------------------------------------------------------------------------
class TestKugellagerParser:
"""All assertions derived from Kugellager_Testscope_20260128.xlsx."""
def test_category_detected(self, parsed_kugellager: ParsedExcel):
assert parsed_kugellager.category_key == "Kugellager"
def test_template_name(self, parsed_kugellager: ParsedExcel):
assert parsed_kugellager.template_name == "Kugellager (Ball Bearings)"
def test_row_count(self, parsed_kugellager: ParsedExcel):
assert len(parsed_kugellager.rows) == 9
def test_row_indices(self, parsed_kugellager: ParsedExcel):
assert parsed_kugellager.rows[0].row_index == 4
assert parsed_kugellager.rows[-1].row_index == 12
def test_no_warnings(self, parsed_kugellager: ParsedExcel):
assert parsed_kugellager.warnings == []
def test_first_row_ebene1(self, parsed_kugellager: ParsedExcel):
assert parsed_kugellager.rows[0].ebene1 == "Wälz- und Gleitlager"
def test_first_row_baureihe(self, parsed_kugellager: ParsedExcel):
assert parsed_kugellager.rows[0].baureihe == "Axial-Rillenkugellager"
def test_first_row_pim_id(self, parsed_kugellager: ParsedExcel):
assert parsed_kugellager.rows[0].pim_id == "2305100101"
def test_first_row_gewaehltes_produkt(self, parsed_kugellager: ParsedExcel):
assert parsed_kugellager.rows[0].gewaehltes_produkt == "51413-MP"
def test_all_medias_rendering_true(self, parsed_kugellager: ParsedExcel):
assert all(r.medias_rendering is True for r in parsed_kugellager.rows)
def test_total_component_count(self, parsed_kugellager: ParsedExcel):
assert sum(len(r.components) for r in parsed_kugellager.rows) == 55
def test_first_component_material(self, parsed_kugellager: ParsedExcel):
assert parsed_kugellager.rows[0].components[0].material == "Stahl v2"
def test_part_names_lowercase(self, parsed_kugellager: ParsedExcel):
for comp in _all_components(parsed_kugellager):
if comp.part_name:
assert comp.part_name == comp.part_name.lower()
def test_serialisation_row_count(self, parsed_kugellager: ParsedExcel):
assert parsed_excel_to_dict(parsed_kugellager)["row_count"] == 9
# ---------------------------------------------------------------------------
# CRB — Cylindrical Roller Bearings
# ---------------------------------------------------------------------------
class TestCRBParser:
"""All assertions derived from CRB_Testscope_20260128.xlsx."""
def test_category_detected(self, parsed_crb: ParsedExcel):
assert parsed_crb.category_key == "CRB"
def test_template_name(self, parsed_crb: ParsedExcel):
assert parsed_crb.template_name == "Cylindrical Roller Bearings (CRB)"
def test_row_count(self, parsed_crb: ParsedExcel):
assert len(parsed_crb.rows) == 4
def test_row_indices(self, parsed_crb: ParsedExcel):
assert [r.row_index for r in parsed_crb.rows] == [4, 5, 6, 7]
def test_no_warnings(self, parsed_crb: ParsedExcel):
assert parsed_crb.warnings == []
def test_first_row_baureihe(self, parsed_crb: ParsedExcel):
assert parsed_crb.rows[0].baureihe == "Axial-Zylinderrollenlager"
def test_first_row_pim_id(self, parsed_crb: ParsedExcel):
assert parsed_crb.rows[0].pim_id == "2305110102"
def test_first_row_gewaehltes_produkt(self, parsed_crb: ParsedExcel):
assert parsed_crb.rows[0].gewaehltes_produkt == "893..-M"
def test_all_medias_rendering_true(self, parsed_crb: ParsedExcel):
assert all(r.medias_rendering is True for r in parsed_crb.rows)
def test_first_row_component_count(self, parsed_crb: ParsedExcel):
assert len(parsed_crb.rows[0].components) == 4
def test_total_component_count(self, parsed_crb: ParsedExcel):
assert sum(len(r.components) for r in parsed_crb.rows) == 13
def test_first_component_material(self, parsed_crb: ParsedExcel):
assert parsed_crb.rows[0].components[0].material == "Stahl v2"
def test_cad_model_names_lowercase(self, parsed_crb: ParsedExcel):
for row in parsed_crb.rows:
if row.name_cad_modell:
assert row.name_cad_modell == row.name_cad_modell.lower()
def test_serialisation(self, parsed_crb: ParsedExcel):
d = parsed_excel_to_dict(parsed_crb)
assert d["category_key"] == "CRB"
assert d["row_count"] == 4
# ---------------------------------------------------------------------------
# Gleitlager — Plain Bearings
# ---------------------------------------------------------------------------
class TestGleitlagerParser:
"""All assertions derived from Gleitlager_Testscope_20260128.xlsx."""
def test_category_detected(self, parsed_gleitlager: ParsedExcel):
assert parsed_gleitlager.category_key == "Gleitlager"
def test_template_name(self, parsed_gleitlager: ParsedExcel):
assert parsed_gleitlager.template_name == "Gleitlager (Plain Bearings)"
def test_row_count(self, parsed_gleitlager: ParsedExcel):
assert len(parsed_gleitlager.rows) == 3
def test_row_indices(self, parsed_gleitlager: ParsedExcel):
assert [r.row_index for r in parsed_gleitlager.rows] == [4, 5, 6]
def test_no_warnings(self, parsed_gleitlager: ParsedExcel):
assert parsed_gleitlager.warnings == []
def test_first_row_baureihe(self, parsed_gleitlager: ParsedExcel):
assert parsed_gleitlager.rows[0].baureihe == "Gelenklager"
def test_first_row_pim_id_is_none(self, parsed_gleitlager: ParsedExcel):
# Gleitlager first row has no PIM-ID
assert parsed_gleitlager.rows[0].pim_id is None
def test_first_row_gewaehltes_produkt(self, parsed_gleitlager: ParsedExcel):
assert parsed_gleitlager.rows[0].gewaehltes_produkt == "GE..-HF"
def test_all_medias_rendering_true(self, parsed_gleitlager: ParsedExcel):
assert all(r.medias_rendering is True for r in parsed_gleitlager.rows)
def test_total_component_count(self, parsed_gleitlager: ParsedExcel):
assert sum(len(r.components) for r in parsed_gleitlager.rows) == 6
def test_first_component_material(self, parsed_gleitlager: ParsedExcel):
assert parsed_gleitlager.rows[0].components[0].material == "Durotect CMT"
def test_serialisation(self, parsed_gleitlager: ParsedExcel):
d = parsed_excel_to_dict(parsed_gleitlager)
assert d["category_key"] == "Gleitlager"
assert d["row_count"] == 3
# ---------------------------------------------------------------------------
# SRB_TORB — Spherical / Toroidal Roller Bearings
# ---------------------------------------------------------------------------
class TestSRBTORBParser:
"""All assertions derived from SRB_TORB_Testscope_20260128.xlsx."""
def test_category_detected(self, parsed_srb_torb: ParsedExcel):
assert parsed_srb_torb.category_key == "SRB_TORB"
def test_template_name(self, parsed_srb_torb: ParsedExcel):
assert parsed_srb_torb.template_name == "Spherical / Toroidal Roller Bearings (SRB/TORB)"
def test_row_count(self, parsed_srb_torb: ParsedExcel):
assert len(parsed_srb_torb.rows) == 2
def test_row_indices(self, parsed_srb_torb: ParsedExcel):
assert [r.row_index for r in parsed_srb_torb.rows] == [4, 5]
def test_no_warnings(self, parsed_srb_torb: ParsedExcel):
assert parsed_srb_torb.warnings == []
def test_first_row_baureihe(self, parsed_srb_torb: ParsedExcel):
assert parsed_srb_torb.rows[0].baureihe == "Radial SRB"
def test_first_row_pim_id(self, parsed_srb_torb: ParsedExcel):
assert parsed_srb_torb.rows[0].pim_id == "2305091102"
def test_first_row_gewaehltes_produkt(self, parsed_srb_torb: ParsedExcel):
assert parsed_srb_torb.rows[0].gewaehltes_produkt == "241..-BE-XL-K30-H40"
def test_all_medias_rendering_true(self, parsed_srb_torb: ParsedExcel):
assert all(r.medias_rendering is True for r in parsed_srb_torb.rows)
def test_first_row_component_count(self, parsed_srb_torb: ParsedExcel):
assert len(parsed_srb_torb.rows[0].components) == 4
def test_total_component_count(self, parsed_srb_torb: ParsedExcel):
assert sum(len(r.components) for r in parsed_srb_torb.rows) == 8
def test_first_component_material(self, parsed_srb_torb: ParsedExcel):
assert parsed_srb_torb.rows[0].components[0].material == "Stahl v2"
def test_serialisation(self, parsed_srb_torb: ParsedExcel):
d = parsed_excel_to_dict(parsed_srb_torb)
assert d["category_key"] == "SRB_TORB"
assert d["row_count"] == 2
# ---------------------------------------------------------------------------
# Linear_schiene — Linear Guide Rails
# ---------------------------------------------------------------------------
class TestLinearSchieneParser:
"""All assertions derived from Linear_schiene_Testscope_20260128.xlsx."""
def test_category_detected(self, parsed_linear_schiene: ParsedExcel):
assert parsed_linear_schiene.category_key == "Linear_schiene"
def test_template_name(self, parsed_linear_schiene: ParsedExcel):
assert parsed_linear_schiene.template_name == "Linear Guide Rails"
def test_row_count(self, parsed_linear_schiene: ParsedExcel):
assert len(parsed_linear_schiene.rows) == 1
def test_row_index_starts_at_4(self, parsed_linear_schiene: ParsedExcel):
assert parsed_linear_schiene.rows[0].row_index == 4
def test_no_warnings(self, parsed_linear_schiene: ParsedExcel):
assert parsed_linear_schiene.warnings == []
def test_first_row_ebene1(self, parsed_linear_schiene: ParsedExcel):
assert parsed_linear_schiene.rows[0].ebene1 == "Linearsysteme"
def test_first_row_baureihe(self, parsed_linear_schiene: ParsedExcel):
assert parsed_linear_schiene.rows[0].baureihe == "Rollenumlaufeinheiten"
def test_first_row_pim_id(self, parsed_linear_schiene: ParsedExcel):
assert parsed_linear_schiene.rows[0].pim_id == "233092AB21"
def test_first_row_gewaehltes_produkt(self, parsed_linear_schiene: ParsedExcel):
assert parsed_linear_schiene.rows[0].gewaehltes_produkt == "TSX..-D"
def test_medias_rendering(self, parsed_linear_schiene: ParsedExcel):
assert parsed_linear_schiene.rows[0].medias_rendering is True
def test_component_count(self, parsed_linear_schiene: ParsedExcel):
assert len(parsed_linear_schiene.rows[0].components) == 1
def test_first_component_part_name(self, parsed_linear_schiene: ParsedExcel):
comp = parsed_linear_schiene.rows[0].components[0]
assert comp.part_name == "tsx25d-g1-hj-gen.prt"
def test_first_component_material(self, parsed_linear_schiene: ParsedExcel):
comp = parsed_linear_schiene.rows[0].components[0]
assert comp.material == "Stahl v2"
def test_serialisation(self, parsed_linear_schiene: ParsedExcel):
d = parsed_excel_to_dict(parsed_linear_schiene)
assert d["category_key"] == "Linear_schiene"
assert d["row_count"] == 1
# ---------------------------------------------------------------------------
# Anschlagplatten — End Plates
# ---------------------------------------------------------------------------
class TestAnschlagplattenParser:
"""All assertions derived from Anschlagplatten_Testscope_20260128.xlsx."""
def test_category_detected(self, parsed_anschlagplatten: ParsedExcel):
assert parsed_anschlagplatten.category_key == "Anschlagplatten"
def test_template_name(self, parsed_anschlagplatten: ParsedExcel):
assert parsed_anschlagplatten.template_name == "End Plates (Anschlagplatten)"
def test_row_count(self, parsed_anschlagplatten: ParsedExcel):
assert len(parsed_anschlagplatten.rows) == 2
def test_row_indices(self, parsed_anschlagplatten: ParsedExcel):
assert [r.row_index for r in parsed_anschlagplatten.rows] == [4, 5]
def test_no_warnings(self, parsed_anschlagplatten: ParsedExcel):
assert parsed_anschlagplatten.warnings == []
def test_first_row_ebene1(self, parsed_anschlagplatten: ParsedExcel):
assert parsed_anschlagplatten.rows[0].ebene1 == "Linearsysteme"
def test_first_row_baureihe(self, parsed_anschlagplatten: ParsedExcel):
assert parsed_anschlagplatten.rows[0].baureihe == "Endplatten für Führungsschiene LFS"
def test_first_row_pim_id(self, parsed_anschlagplatten: ParsedExcel):
assert parsed_anschlagplatten.rows[0].pim_id == "233092AM41"
def test_first_row_gewaehltes_produkt(self, parsed_anschlagplatten: ParsedExcel):
assert parsed_anschlagplatten.rows[0].gewaehltes_produkt == "ANS.LFS52-FH"
def test_all_medias_rendering_true(self, parsed_anschlagplatten: ParsedExcel):
assert all(r.medias_rendering is True for r in parsed_anschlagplatten.rows)
def test_total_component_count(self, parsed_anschlagplatten: ParsedExcel):
assert sum(len(r.components) for r in parsed_anschlagplatten.rows) == 3
def test_first_component_part_name(self, parsed_anschlagplatten: ParsedExcel):
comp = parsed_anschlagplatten.rows[0].components[0]
assert comp.part_name == "ans_lfs52-fh-0011_p.prt"
def test_first_component_material(self, parsed_anschlagplatten: ParsedExcel):
comp = parsed_anschlagplatten.rows[0].components[0]
assert comp.material == "Stahl brüniert"
def test_serialisation(self, parsed_anschlagplatten: ParsedExcel):
d = parsed_excel_to_dict(parsed_anschlagplatten)
assert d["category_key"] == "Anschlagplatten"
assert d["row_count"] == 2
# ---------------------------------------------------------------------------
# Cross-file structural invariants
# ---------------------------------------------------------------------------
class TestAllFilesStructural:
"""Invariants that must hold for every one of the 7 sample files."""
ALL_CATEGORIES = [
"TRB", "Kugellager", "CRB", "Gleitlager",
"SRB_TORB", "Linear_schiene", "Anschlagplatten",
]
def test_all_categories_detected(self, parsed_excel_all: dict):
for cat in self.ALL_CATEGORIES:
assert parsed_excel_all[cat].category_key == cat
def test_all_have_template_names(self, parsed_excel_all: dict):
for cat, parsed in parsed_excel_all.items():
assert parsed.template_name is not None, f"{cat}: template_name is None"
def test_all_have_at_least_one_row(self, parsed_excel_all: dict):
for cat, parsed in parsed_excel_all.items():
assert len(parsed.rows) > 0, f"{cat}: no data rows parsed"
def test_all_rows_start_at_index_4(self, parsed_excel_all: dict):
for cat, parsed in parsed_excel_all.items():
assert parsed.rows[0].row_index == 4, (
f"{cat}: first row_index is {parsed.rows[0].row_index}, expected 4"
)
def test_row_indices_monotonically_increasing(self, parsed_excel_all: dict):
for cat, parsed in parsed_excel_all.items():
indices = [r.row_index for r in parsed.rows]
assert indices == sorted(indices), f"{cat}: row indices not ascending: {indices}"
def test_all_medias_rendering_true(self, parsed_excel_all: dict):
for cat, parsed in parsed_excel_all.items():
for row in parsed.rows:
assert row.medias_rendering is True, (
f"{cat} row {row.row_index}: medias_rendering={row.medias_rendering}"
)
def test_all_files_have_no_warnings(self, parsed_excel_all: dict):
for cat, parsed in parsed_excel_all.items():
assert parsed.warnings == [], f"{cat} produced warnings: {parsed.warnings}"
def test_all_component_column_indices_gte_11(self, parsed_excel_all: dict):
for cat, parsed in parsed_excel_all.items():
for row in parsed.rows:
for comp in row.components:
assert comp.column_index >= 11, (
f"{cat} row {row.row_index}: component column_index={comp.column_index}"
)
def test_all_part_names_lowercase(self, parsed_excel_all: dict):
"""The parser normalises filenames to lowercase."""
for cat, parsed in parsed_excel_all.items():
for row in parsed.rows:
for comp in row.components:
if comp.part_name:
assert comp.part_name == comp.part_name.lower(), (
f"{cat} row {row.row_index}: part_name not lowercase: {comp.part_name!r}"
)
def test_all_cad_model_names_lowercase(self, parsed_excel_all: dict):
for cat, parsed in parsed_excel_all.items():
for row in parsed.rows:
if row.name_cad_modell:
assert row.name_cad_modell == row.name_cad_modell.lower(), (
f"{cat} row {row.row_index}: name_cad_modell not lowercase: {row.name_cad_modell!r}"
)
def test_all_have_at_least_11_column_headers(self, parsed_excel_all: dict):
for cat, parsed in parsed_excel_all.items():
assert len(parsed.column_headers) >= 11, (
f"{cat}: only {len(parsed.column_headers)} column headers (expected >= 11)"
)
def test_serialised_dict_required_keys(self, parsed_excel_all: dict):
required = {
"filename", "category_key", "template_name",
"row_count", "column_headers", "rows", "warnings",
}
for cat, parsed in parsed_excel_all.items():
d = parsed_excel_to_dict(parsed)
missing = required - d.keys()
assert not missing, f"{cat}: serialised dict missing keys: {missing}"
def test_serialised_row_required_keys(self, parsed_excel_all: dict):
required = {
"row_index", "ebene1", "ebene2", "baureihe", "pim_id",
"produkt_baureihe", "gewaehltes_produkt", "name_cad_modell",
"gewuenschte_bildnummer", "lagertyp", "medias_rendering", "components",
}
for cat, parsed in parsed_excel_all.items():
for row in parsed.rows:
d = parsed_row_to_dict(row)
missing = required - d.keys()
assert not missing, (
f"{cat} row {row.row_index}: serialised row missing keys: {missing}"
)
def test_serialised_component_required_keys(self, parsed_excel_all: dict):
required = {"part_name", "material", "component_type", "column_index"}
for cat, parsed in parsed_excel_all.items():
for row in parsed.rows:
for comp_d in parsed_row_to_dict(row)["components"]:
missing = required - comp_d.keys()
assert not missing, (
f"{cat} row {row.row_index}: component dict missing keys: {missing}"
)
def test_serialised_row_count_matches(self, parsed_excel_all: dict):
for cat, parsed in parsed_excel_all.items():
d = parsed_excel_to_dict(parsed)
assert d["row_count"] == len(d["rows"]) == len(parsed.rows)
# ---------------------------------------------------------------------------
# Internal helper unit tests
# ---------------------------------------------------------------------------
class TestNormalizeFilename:
def test_lowercases_extension(self):
assert _normalize_filename("TEST.PRT") == "test.prt"
def test_strips_leading_trailing_spaces(self):
assert _normalize_filename(" 81113-L_cut.stp ") == "81113-l_cut.stp"
def test_none_returns_none(self):
assert _normalize_filename(None) is None
def test_empty_string_returns_none(self):
# _normalize_filename("") returns "" which the _clean wrapper converts to None
# In the parser _normalize_filename wraps _clean, so empty → None
result = _normalize_filename("")
# The function strips and lowercases; empty string stays empty (falsy)
assert result == "" or result is None
class TestToBool:
@pytest.mark.parametrize("val,expected", [
(1, True),
(0, False),
(True, True),
(False, False),
("1", True),
("0", False),
("ja", True),
("Ja", True),
("nein", False),
("Nein", False),
("yes", True),
("no", False),
("x", True),
("", False),
(None, None),
])
def test_to_bool_parametrize(self, val, expected):
assert _to_bool(val) == expected
def test_medias_rendering_is_bool_or_none(self, parsed_excel_all: dict):
for cat, parsed in parsed_excel_all.items():
for row in parsed.rows:
assert row.medias_rendering in (True, False, None), (
f"{cat} row {row.row_index}: unexpected medias_rendering={row.medias_rendering!r}"
)
# ---------------------------------------------------------------------------
# Error handling
# ---------------------------------------------------------------------------
class TestParseExcelErrors:
def test_nonexistent_file_raises(self, tmp_path: Path):
with pytest.raises(ValueError, match="Cannot open Excel file"):
parse_excel(tmp_path / "does_not_exist.xlsx")
def test_too_few_rows_raises(self, tmp_path: Path):
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
ws.append(["only one row"])
ws.append(["only two rows"])
path = tmp_path / "short.xlsx"
wb.save(path)
with pytest.raises(ValueError, match="fewer than 3 rows"):
parse_excel(path)
def test_empty_data_rows_produces_warning(self, tmp_path: Path):
"""A file with valid headers but zero data rows should warn, not raise."""
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
ws.append(["Instructions row 1"])
ws.append(["Instructions row 2"])
ws.append([
"Ebene1", "Ebene2", "Baureihe", "PIM", "Produkt", "SEP",
"Produkt", "Name", "Bildnr", "Lagertyp", "Medias",
])
# Intentionally no data rows
path = tmp_path / "no_data.xlsx"
wb.save(path)
result = parse_excel(path)
assert result.rows == []
assert len(result.warnings) > 0
def test_parse_accepts_pathlib_path(self, excel_paths: dict):
"""parse_excel should accept a Path object, not just a string."""
path = excel_paths["TRB"]
assert isinstance(path, Path)
result = parse_excel(path)
assert result.category_key == "TRB"
def test_parse_accepts_string_path(self, excel_paths: dict):
"""parse_excel should also accept a plain string path."""
result = parse_excel(str(excel_paths["CRB"]))
assert result.category_key == "CRB"