feat(import): harden workbook parser boundaries

This commit is contained in:
2026-03-31 22:48:30 +02:00
parent 3e8b1702bc
commit db50e2e555
20 changed files with 936 additions and 174 deletions
@@ -27,6 +27,7 @@ export interface DispoImportReadinessIssue {
| "FALLBACK_EMAIL_REQUIRED"
| "FALLBACK_LCR_REQUIRED"
| "FALLBACK_UCR_REQUIRED"
| "PUBLIC_HOLIDAY_IMPORT_REQUIRES_CALENDAR_SYNC"
| "PLANNING_RESOURCE_MISSING_FROM_ROSTER"
| "REFERENCE_RESOURCE_MASTER_MISSING"
| "UNRESOLVED_RECORDS_PRESENT";
@@ -172,6 +173,10 @@ export async function assessDispoImportReadiness(
filterUnresolvedCount(chargeabilityWorkbook.unresolved, excludedIds) +
filterUnresolvedCount(planningWorkbook.unresolved, excludedIds) +
filterUnresolvedCount(rosterWorkbook?.unresolved ?? [], excludedIds);
const publicHolidayImportCount = planningWorkbook.vacations.filter(
(vacation) =>
!excludedIds.has(vacation.resourceExternalId) && vacation.vacationType === "PUBLIC_HOLIDAY",
).length;
const missingEmailCount = Array.from(mergedResources.values()).filter(
(resource) => !resource.email,
).length;
@@ -254,6 +259,20 @@ export async function assessDispoImportReadiness(
);
}
if (publicHolidayImportCount > 0) {
issues.push(
buildReadinessIssue({
code: "PUBLIC_HOLIDAY_IMPORT_REQUIRES_CALENDAR_SYNC",
count: publicHolidayImportCount,
message:
"Planning import contains PUBLIC_HOLIDAY rows. Public holidays must be managed through holiday calendars so country/state/city-specific rules stay canonical.",
resolution:
"Import or update the relevant holiday calendars first, then remove PUBLIC_HOLIDAY rows from the generic planning/vacation import before commit.",
severity: "blocker",
}),
);
}
if (unresolvedCount > 0) {
issues.push(
buildReadinessIssue({
@@ -166,6 +166,25 @@ function getSlotHalfDayPart(slotLabel: string | null): "AFTERNOON" | "MORNING" |
return null;
}
function isPlanningSummaryRow(row: ReadonlyArray<WorksheetCellValue>): boolean {
if ((row[0] ?? null) !== null || (row[1] ?? null) !== null) {
return false;
}
const repeatedLabels = row
.slice(DISPO_EID_COLUMN - 1, 9)
.map((value) => normalizeNullableWorkbookValue(value))
.filter((value): value is string => value !== null);
if (repeatedLabels.length === 0) {
return false;
}
const normalizedLabels = new Set(repeatedLabels.map((value) => value.toLowerCase()));
const label = repeatedLabels[0] ?? null;
return normalizedLabels.size === 1 && label !== null && label.startsWith("(") && label.endsWith(")");
}
function buildPlanningColumns(rows: ReadonlyArray<ReadonlyArray<WorksheetCellValue>>) {
const columns: PlanningColumn[] = [];
const headerWidth = Math.max(rows[DISPO_DATE_ROW - 1]?.length ?? 0, rows[DISPO_SLOT_ROW - 1]?.length ?? 0);
@@ -483,6 +502,9 @@ export async function parseDispoPlanningWorkbook(
for (let rowNumber = DISPO_DATA_START_ROW; rowNumber <= rows.length; rowNumber += 1) {
const row = rows[rowNumber - 1] ?? [];
if (isPlanningSummaryRow(row)) {
continue;
}
const eid = normalizeNullableWorkbookValue(row[DISPO_EID_COLUMN - 1]);
if (!eid) {
@@ -1,32 +1,41 @@
import { stat } from "node:fs/promises";
import { createRequire } from "node:module";
import path from "node:path";
export type WorksheetCellValue = boolean | Date | number | string | null;
export type WorksheetMatrix = WorksheetCellValue[][];
type XlsxWorkbook = {
Sheets: Record<string, unknown>;
type ExcelJsModule = typeof import("exceljs");
type ExcelJsWorkbook = InstanceType<ExcelJsModule["Workbook"]>;
type ExcelJsXlsxReader = ExcelJsWorkbook["xlsx"] & {
_processTableEntry?: (
stream: unknown,
model: Record<string, unknown>,
name: string,
) => Promise<unknown>;
};
type SheetToJsonOptions = {
header: 1;
raw: true;
defval: null;
};
type XlsxRuntime = {
readFile(filePath: string, options: { cellDates: true; dense: true }): XlsxWorkbook;
utils: {
sheet_to_json<T>(worksheet: unknown, options: SheetToJsonOptions): T[];
};
};
const require = createRequire(import.meta.url);
const XLSX = require("xlsx") as XlsxRuntime;
const DISPO_WORKBOOK_EXTENSION = ".xlsx";
export const MAX_DISPO_WORKBOOK_BYTES = 15 * 1024 * 1024;
export const MAX_DISPO_WORKBOOK_ROWS = 10000;
export const MAX_DISPO_WORKBOOK_COLUMNS = 1024;
const EXCELJS_IGNORE_WORKSHEET_NODES = ["tableParts"];
const EXCELJS_UNSUPPORTED_TABLE_FILTER_MARKER = '"name":"dateGroupItem"';
let _excelJs: ExcelJsModule | null = null;
const worksheetMatrixCache = new Map<string, Promise<WorksheetMatrix>>();
function normalizeExcelJsModule(module: ExcelJsModule | { default?: ExcelJsModule }): ExcelJsModule {
return "Workbook" in module ? module : (module.default as ExcelJsModule);
}
async function getExcelJS() {
if (!_excelJs) {
_excelJs = normalizeExcelJsModule(await import("exceljs"));
}
return _excelJs;
}
function trimTrailingNulls(row: WorksheetCellValue[]): WorksheetCellValue[] {
let end = row.length;
@@ -44,6 +53,10 @@ function trimTrailingEmptyRows(rows: WorksheetMatrix): WorksheetMatrix {
return rows.slice(0, end);
}
function cloneWorksheetMatrix(rows: WorksheetMatrix): WorksheetMatrix {
return rows.map((row) => row.slice());
}
async function validateWorkbookPath(workbookPath: string): Promise<string> {
const resolvedPath = path.resolve(workbookPath);
@@ -119,31 +132,99 @@ function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
return String(value);
}
function assertWorksheetShape(rows: WorksheetMatrix, sheetName: string, workbookPath: string): void {
if (rows.length > MAX_DISPO_WORKBOOK_ROWS) {
throw new Error(
`Worksheet "${sheetName}" in "${workbookPath}" exceeds the ${MAX_DISPO_WORKBOOK_ROWS} row import limit.`,
);
}
const widestRow = rows.reduce((max, row) => Math.max(max, row.length), 0);
if (widestRow > MAX_DISPO_WORKBOOK_COLUMNS) {
throw new Error(
`Worksheet "${sheetName}" in "${workbookPath}" exceeds the ${MAX_DISPO_WORKBOOK_COLUMNS} column import limit.`,
);
}
}
function isUnsupportedExcelJsTableFilterError(error: unknown): boolean {
return error instanceof Error && error.message.includes(EXCELJS_UNSUPPORTED_TABLE_FILTER_MARKER);
}
function patchExcelJsTableCompatibility(workbook: ExcelJsWorkbook): void {
const reader = workbook.xlsx as ExcelJsXlsxReader;
const originalProcessTableEntry = reader._processTableEntry;
if (typeof originalProcessTableEntry !== "function") {
return;
}
reader._processTableEntry = async function processTableEntryWithCompatibilityFallback(
stream,
model,
name,
) {
try {
return await originalProcessTableEntry.call(this, stream, model, name);
} catch (error) {
if (isUnsupportedExcelJsTableFilterError(error)) {
return undefined;
}
throw error;
}
};
}
export async function readWorksheetMatrix(
workbookPath: string,
sheetName: string,
): Promise<WorksheetMatrix> {
const resolvedPath = await validateWorkbookPath(workbookPath);
const workbook = XLSX.readFile(resolvedPath, {
cellDates: true,
dense: true,
});
const worksheet = workbook.Sheets[sheetName];
if (!worksheet) {
throw new Error(`Worksheet "${sheetName}" not found in workbook "${resolvedPath}"`);
const cacheKey = `${resolvedPath}::${sheetName}`;
const cachedMatrix = worksheetMatrixCache.get(cacheKey);
if (cachedMatrix) {
return cloneWorksheetMatrix(await cachedMatrix);
}
const rows = XLSX.utils.sheet_to_json<(WorksheetCellValue | null)[]>(worksheet, {
header: 1,
raw: true,
defval: null,
});
const matrixPromise = (async () => {
const ExcelJS = await getExcelJS();
const workbook = new ExcelJS.Workbook();
patchExcelJsTableCompatibility(workbook);
await workbook.xlsx.readFile(resolvedPath, { ignoreNodes: EXCELJS_IGNORE_WORKSHEET_NODES });
return trimTrailingEmptyRows(
rows.map((row: (WorksheetCellValue | null)[]) =>
trimTrailingNulls(row.map((value: WorksheetCellValue | null) => normalizeWorksheetCellValue(value))),
),
);
const worksheet = workbook.getWorksheet(sheetName);
if (!worksheet) {
throw new Error(`Worksheet "${sheetName}" not found in workbook "${resolvedPath}"`);
}
const rows: WorksheetMatrix = [];
for (let rowNumber = 1; rowNumber <= worksheet.rowCount; rowNumber += 1) {
const row = worksheet.getRow(rowNumber);
const cells: WorksheetCellValue[] = [];
for (let columnNumber = 1; columnNumber <= row.cellCount; columnNumber += 1) {
cells.push(normalizeWorksheetCellValue(row.getCell(columnNumber).value));
}
rows.push(trimTrailingNulls(cells));
}
const normalizedRows = trimTrailingEmptyRows(rows);
assertWorksheetShape(normalizedRows, sheetName, resolvedPath);
return normalizedRows;
})();
worksheetMatrixCache.set(cacheKey, matrixPromise);
try {
return cloneWorksheetMatrix(await matrixPromise);
} catch (error) {
worksheetMatrixCache.delete(cacheKey);
throw error;
}
}
export function getCellString(
@@ -55,6 +55,12 @@ export async function validateDispoBatch(
status: StagedRecordStatus.UNRESOLVED,
},
});
const stagedPublicHolidayCount = await db.stagedVacation.count({
where: {
importBatchId: batch.id,
vacationType: "PUBLIC_HOLIDAY",
},
});
const blockingUnresolved = unresolved.filter(
(record) =>
!(
@@ -70,6 +76,12 @@ export async function validateDispoBatch(
);
}
if (stagedPublicHolidayCount > 0) {
throw new Error(
`Import batch "${batch.id}" still contains ${stagedPublicHolidayCount} staged PUBLIC_HOLIDAY row(s). Public holidays must be synchronized through holiday calendars before commit.`,
);
}
return {
batchId: batch.id,
batchSummary: batch.summary,