feat(import): harden workbook parser boundaries
This commit is contained in:
@@ -29,6 +29,7 @@ function createCommitDb(overrides: Record<string, unknown> = {}) {
|
||||
},
|
||||
stagedVacation: {
|
||||
findMany: vi.fn().mockResolvedValue([]),
|
||||
count: vi.fn().mockResolvedValue(0),
|
||||
updateMany: vi.fn().mockResolvedValue({ count: 0 }),
|
||||
},
|
||||
stagedAvailabilityRule: {
|
||||
@@ -94,6 +95,9 @@ function createCommitDb(overrides: Record<string, unknown> = {}) {
|
||||
findUnique: vi.fn().mockResolvedValue({ id: "batch_1", status: "STAGED", summary: {} }),
|
||||
update: vi.fn().mockResolvedValue({}),
|
||||
},
|
||||
stagedVacation: {
|
||||
count: vi.fn().mockResolvedValue(0),
|
||||
},
|
||||
stagedUnresolvedRecord: {
|
||||
findMany: vi.fn().mockResolvedValue([]),
|
||||
},
|
||||
@@ -233,11 +237,11 @@ describe("commitDispoImportBatch", () => {
|
||||
{
|
||||
id: "sv_1",
|
||||
resourceExternalId: "ada.director",
|
||||
vacationType: "PUBLIC_HOLIDAY",
|
||||
startDate: new Date("2026-01-01T00:00:00.000Z"),
|
||||
endDate: new Date("2026-01-01T00:00:00.000Z"),
|
||||
note: "New Year",
|
||||
holidayName: "New Year",
|
||||
vacationType: "ANNUAL",
|
||||
startDate: new Date("2026-01-08T00:00:00.000Z"),
|
||||
endDate: new Date("2026-01-09T00:00:00.000Z"),
|
||||
note: "Winter vacation",
|
||||
holidayName: null,
|
||||
isHalfDay: false,
|
||||
halfDayPart: null,
|
||||
},
|
||||
@@ -705,4 +709,18 @@ describe("commitDispoImportBatch", () => {
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects staged PUBLIC_HOLIDAY rows until holiday calendars are synchronized", async () => {
|
||||
const { db, tx } = createCommitDb();
|
||||
|
||||
db.stagedVacation.count.mockResolvedValue(2);
|
||||
|
||||
await expect(
|
||||
commitDispoImportBatch(db as never, {
|
||||
importBatchId: "batch_1",
|
||||
}),
|
||||
).rejects.toThrow(
|
||||
'Import batch "batch_1" still contains 2 staged PUBLIC_HOLIDAY row(s). Public holidays must be synchronized through holiday calendars before commit.',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -230,8 +230,8 @@ describe("dispo import", () => {
|
||||
});
|
||||
|
||||
expect(report.resourceCount).toBeGreaterThan(500);
|
||||
expect(report.canCommitWithStrictSourceData).toBe(true);
|
||||
expect(report.canCommitWithFallbacks).toBe(true);
|
||||
expect(report.canCommitWithStrictSourceData).toBe(false);
|
||||
expect(report.canCommitWithFallbacks).toBe(false);
|
||||
expect(report.issues.find((issue) => issue.code === "FALLBACK_EMAIL_REQUIRED")).toBeUndefined();
|
||||
expect(report.issues.find((issue) => issue.code === "FALLBACK_LCR_REQUIRED")).toBeUndefined();
|
||||
expect(report.issues.find((issue) => issue.code === "FALLBACK_UCR_REQUIRED")).toBeUndefined();
|
||||
@@ -247,6 +247,10 @@ describe("dispo import", () => {
|
||||
);
|
||||
expect(report.issues).toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
code: "PUBLIC_HOLIDAY_IMPORT_REQUIRES_CALENDAR_SYNC",
|
||||
severity: "blocker",
|
||||
}),
|
||||
expect.objectContaining({
|
||||
code: "UNRESOLVED_RECORDS_PRESENT",
|
||||
severity: "warning",
|
||||
@@ -740,7 +744,7 @@ describe("dispo import", () => {
|
||||
expect(result.counts.stagedResources).toBeGreaterThan(800);
|
||||
expect(result.counts.stagedRosterResources).toBeGreaterThan(500);
|
||||
expect(result.counts.stagedAssignments).toBeGreaterThan(1000);
|
||||
expect(result.readiness.canCommitWithStrictSourceData).toBe(true);
|
||||
expect(result.readiness.canCommitWithStrictSourceData).toBe(false);
|
||||
expect(result.readiness.issues).not.toEqual(
|
||||
expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
@@ -754,7 +758,7 @@ describe("dispo import", () => {
|
||||
data: expect.objectContaining({
|
||||
summary: expect.objectContaining({
|
||||
readiness: expect.objectContaining({
|
||||
canCommitWithStrictSourceData: true,
|
||||
canCommitWithStrictSourceData: false,
|
||||
}),
|
||||
}),
|
||||
}),
|
||||
|
||||
@@ -5,12 +5,23 @@ import { fileURLToPath } from "node:url";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import {
|
||||
MAX_DISPO_WORKBOOK_BYTES,
|
||||
MAX_DISPO_WORKBOOK_COLUMNS,
|
||||
MAX_DISPO_WORKBOOK_ROWS,
|
||||
readWorksheetMatrix,
|
||||
} from "../use-cases/dispo-import/read-workbook.js";
|
||||
|
||||
const referenceWorkbookPath = fileURLToPath(
|
||||
new URL("../../../../samples/Dispov2/MandatoryDispoCategories_V3.xlsx", import.meta.url),
|
||||
);
|
||||
const chargeabilityWorkbookPath = fileURLToPath(
|
||||
new URL(
|
||||
"../../../../samples/Dispov2/20260309_Bi-Weekly_Chargeability_Reporting_Content_Production_V0.943_4Hartmut.xlsx",
|
||||
import.meta.url,
|
||||
),
|
||||
);
|
||||
const planningWorkbookPath = fileURLToPath(
|
||||
new URL("../../../../samples/Dispov2/DISPO_2026.xlsx", import.meta.url),
|
||||
);
|
||||
|
||||
const tempDirectories: string[] = [];
|
||||
|
||||
@@ -28,6 +39,18 @@ async function makeTempDirectory(): Promise<string> {
|
||||
return directory;
|
||||
}
|
||||
|
||||
async function writeWorkbook(filePath: string, rows: unknown[][], sheetName = "Sheet1"): Promise<void> {
|
||||
const ExcelJS = await import("exceljs");
|
||||
const workbook = new ExcelJS.Workbook();
|
||||
const worksheet = workbook.addWorksheet(sheetName);
|
||||
|
||||
for (const row of rows) {
|
||||
worksheet.addRow(row);
|
||||
}
|
||||
|
||||
await workbook.xlsx.writeFile(filePath);
|
||||
}
|
||||
|
||||
describe("readWorksheetMatrix", () => {
|
||||
it("reads trusted xlsx worksheets through the hardened reader", async () => {
|
||||
const rows = await readWorksheetMatrix(referenceWorkbookPath, "EID-Attr");
|
||||
@@ -36,6 +59,21 @@ describe("readWorksheetMatrix", () => {
|
||||
expect(rows.some((row) => row.length > 0)).toBe(true);
|
||||
});
|
||||
|
||||
it("tolerates workbook tables that contain unsupported exceljs date group filters", async () => {
|
||||
const rows = await readWorksheetMatrix(chargeabilityWorkbookPath, "ChgFC");
|
||||
|
||||
expect(rows.length).toBeGreaterThan(300);
|
||||
expect(rows[0]?.length).toBeGreaterThan(5);
|
||||
});
|
||||
|
||||
it("accepts real dispo planning worksheets within the supported width envelope", async () => {
|
||||
const rows = await readWorksheetMatrix(planningWorkbookPath, "Dispo");
|
||||
|
||||
expect(rows.length).toBeGreaterThan(500);
|
||||
expect(rows.some((row) => row.length > 256)).toBe(true);
|
||||
expect(rows.every((row) => row.length <= MAX_DISPO_WORKBOOK_COLUMNS)).toBe(true);
|
||||
});
|
||||
|
||||
it("rejects legacy .xls workbook paths", async () => {
|
||||
const directory = await makeTempDirectory();
|
||||
const legacyPath = path.join(directory, "legacy-input.xls");
|
||||
@@ -55,4 +93,30 @@ describe("readWorksheetMatrix", () => {
|
||||
"Workbook file exceeds the",
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects worksheets that exceed the row limit", async () => {
|
||||
const directory = await makeTempDirectory();
|
||||
const workbookPath = path.join(directory, "too-many-rows.xlsx");
|
||||
await writeWorkbook(
|
||||
workbookPath,
|
||||
Array.from({ length: MAX_DISPO_WORKBOOK_ROWS + 1 }, (_, index) => [`row-${index + 1}`]),
|
||||
);
|
||||
|
||||
await expect(readWorksheetMatrix(workbookPath, "Sheet1")).rejects.toThrow(
|
||||
`exceeds the ${MAX_DISPO_WORKBOOK_ROWS} row import limit`,
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects worksheets that exceed the column limit", async () => {
|
||||
const directory = await makeTempDirectory();
|
||||
const workbookPath = path.join(directory, "too-many-columns.xlsx");
|
||||
await writeWorkbook(
|
||||
workbookPath,
|
||||
[Array.from({ length: MAX_DISPO_WORKBOOK_COLUMNS + 1 }, (_, index) => `col-${index + 1}`)],
|
||||
);
|
||||
|
||||
await expect(readWorksheetMatrix(workbookPath, "Sheet1")).rejects.toThrow(
|
||||
`exceeds the ${MAX_DISPO_WORKBOOK_COLUMNS} column import limit`,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -27,6 +27,7 @@ export interface DispoImportReadinessIssue {
|
||||
| "FALLBACK_EMAIL_REQUIRED"
|
||||
| "FALLBACK_LCR_REQUIRED"
|
||||
| "FALLBACK_UCR_REQUIRED"
|
||||
| "PUBLIC_HOLIDAY_IMPORT_REQUIRES_CALENDAR_SYNC"
|
||||
| "PLANNING_RESOURCE_MISSING_FROM_ROSTER"
|
||||
| "REFERENCE_RESOURCE_MASTER_MISSING"
|
||||
| "UNRESOLVED_RECORDS_PRESENT";
|
||||
@@ -172,6 +173,10 @@ export async function assessDispoImportReadiness(
|
||||
filterUnresolvedCount(chargeabilityWorkbook.unresolved, excludedIds) +
|
||||
filterUnresolvedCount(planningWorkbook.unresolved, excludedIds) +
|
||||
filterUnresolvedCount(rosterWorkbook?.unresolved ?? [], excludedIds);
|
||||
const publicHolidayImportCount = planningWorkbook.vacations.filter(
|
||||
(vacation) =>
|
||||
!excludedIds.has(vacation.resourceExternalId) && vacation.vacationType === "PUBLIC_HOLIDAY",
|
||||
).length;
|
||||
const missingEmailCount = Array.from(mergedResources.values()).filter(
|
||||
(resource) => !resource.email,
|
||||
).length;
|
||||
@@ -254,6 +259,20 @@ export async function assessDispoImportReadiness(
|
||||
);
|
||||
}
|
||||
|
||||
if (publicHolidayImportCount > 0) {
|
||||
issues.push(
|
||||
buildReadinessIssue({
|
||||
code: "PUBLIC_HOLIDAY_IMPORT_REQUIRES_CALENDAR_SYNC",
|
||||
count: publicHolidayImportCount,
|
||||
message:
|
||||
"Planning import contains PUBLIC_HOLIDAY rows. Public holidays must be managed through holiday calendars so country/state/city-specific rules stay canonical.",
|
||||
resolution:
|
||||
"Import or update the relevant holiday calendars first, then remove PUBLIC_HOLIDAY rows from the generic planning/vacation import before commit.",
|
||||
severity: "blocker",
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
if (unresolvedCount > 0) {
|
||||
issues.push(
|
||||
buildReadinessIssue({
|
||||
|
||||
@@ -166,6 +166,25 @@ function getSlotHalfDayPart(slotLabel: string | null): "AFTERNOON" | "MORNING" |
|
||||
return null;
|
||||
}
|
||||
|
||||
function isPlanningSummaryRow(row: ReadonlyArray<WorksheetCellValue>): boolean {
|
||||
if ((row[0] ?? null) !== null || (row[1] ?? null) !== null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const repeatedLabels = row
|
||||
.slice(DISPO_EID_COLUMN - 1, 9)
|
||||
.map((value) => normalizeNullableWorkbookValue(value))
|
||||
.filter((value): value is string => value !== null);
|
||||
if (repeatedLabels.length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const normalizedLabels = new Set(repeatedLabels.map((value) => value.toLowerCase()));
|
||||
const label = repeatedLabels[0] ?? null;
|
||||
|
||||
return normalizedLabels.size === 1 && label !== null && label.startsWith("(") && label.endsWith(")");
|
||||
}
|
||||
|
||||
function buildPlanningColumns(rows: ReadonlyArray<ReadonlyArray<WorksheetCellValue>>) {
|
||||
const columns: PlanningColumn[] = [];
|
||||
const headerWidth = Math.max(rows[DISPO_DATE_ROW - 1]?.length ?? 0, rows[DISPO_SLOT_ROW - 1]?.length ?? 0);
|
||||
@@ -483,6 +502,9 @@ export async function parseDispoPlanningWorkbook(
|
||||
|
||||
for (let rowNumber = DISPO_DATA_START_ROW; rowNumber <= rows.length; rowNumber += 1) {
|
||||
const row = rows[rowNumber - 1] ?? [];
|
||||
if (isPlanningSummaryRow(row)) {
|
||||
continue;
|
||||
}
|
||||
const eid = normalizeNullableWorkbookValue(row[DISPO_EID_COLUMN - 1]);
|
||||
|
||||
if (!eid) {
|
||||
|
||||
@@ -1,32 +1,41 @@
|
||||
import { stat } from "node:fs/promises";
|
||||
import { createRequire } from "node:module";
|
||||
import path from "node:path";
|
||||
|
||||
export type WorksheetCellValue = boolean | Date | number | string | null;
|
||||
export type WorksheetMatrix = WorksheetCellValue[][];
|
||||
|
||||
type XlsxWorkbook = {
|
||||
Sheets: Record<string, unknown>;
|
||||
type ExcelJsModule = typeof import("exceljs");
|
||||
type ExcelJsWorkbook = InstanceType<ExcelJsModule["Workbook"]>;
|
||||
type ExcelJsXlsxReader = ExcelJsWorkbook["xlsx"] & {
|
||||
_processTableEntry?: (
|
||||
stream: unknown,
|
||||
model: Record<string, unknown>,
|
||||
name: string,
|
||||
) => Promise<unknown>;
|
||||
};
|
||||
|
||||
type SheetToJsonOptions = {
|
||||
header: 1;
|
||||
raw: true;
|
||||
defval: null;
|
||||
};
|
||||
|
||||
type XlsxRuntime = {
|
||||
readFile(filePath: string, options: { cellDates: true; dense: true }): XlsxWorkbook;
|
||||
utils: {
|
||||
sheet_to_json<T>(worksheet: unknown, options: SheetToJsonOptions): T[];
|
||||
};
|
||||
};
|
||||
|
||||
const require = createRequire(import.meta.url);
|
||||
const XLSX = require("xlsx") as XlsxRuntime;
|
||||
|
||||
const DISPO_WORKBOOK_EXTENSION = ".xlsx";
|
||||
export const MAX_DISPO_WORKBOOK_BYTES = 15 * 1024 * 1024;
|
||||
export const MAX_DISPO_WORKBOOK_ROWS = 10000;
|
||||
export const MAX_DISPO_WORKBOOK_COLUMNS = 1024;
|
||||
|
||||
const EXCELJS_IGNORE_WORKSHEET_NODES = ["tableParts"];
|
||||
const EXCELJS_UNSUPPORTED_TABLE_FILTER_MARKER = '"name":"dateGroupItem"';
|
||||
|
||||
let _excelJs: ExcelJsModule | null = null;
|
||||
const worksheetMatrixCache = new Map<string, Promise<WorksheetMatrix>>();
|
||||
|
||||
function normalizeExcelJsModule(module: ExcelJsModule | { default?: ExcelJsModule }): ExcelJsModule {
|
||||
return "Workbook" in module ? module : (module.default as ExcelJsModule);
|
||||
}
|
||||
|
||||
async function getExcelJS() {
|
||||
if (!_excelJs) {
|
||||
_excelJs = normalizeExcelJsModule(await import("exceljs"));
|
||||
}
|
||||
|
||||
return _excelJs;
|
||||
}
|
||||
|
||||
function trimTrailingNulls(row: WorksheetCellValue[]): WorksheetCellValue[] {
|
||||
let end = row.length;
|
||||
@@ -44,6 +53,10 @@ function trimTrailingEmptyRows(rows: WorksheetMatrix): WorksheetMatrix {
|
||||
return rows.slice(0, end);
|
||||
}
|
||||
|
||||
function cloneWorksheetMatrix(rows: WorksheetMatrix): WorksheetMatrix {
|
||||
return rows.map((row) => row.slice());
|
||||
}
|
||||
|
||||
async function validateWorkbookPath(workbookPath: string): Promise<string> {
|
||||
const resolvedPath = path.resolve(workbookPath);
|
||||
|
||||
@@ -119,31 +132,99 @@ function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
|
||||
return String(value);
|
||||
}
|
||||
|
||||
function assertWorksheetShape(rows: WorksheetMatrix, sheetName: string, workbookPath: string): void {
|
||||
if (rows.length > MAX_DISPO_WORKBOOK_ROWS) {
|
||||
throw new Error(
|
||||
`Worksheet "${sheetName}" in "${workbookPath}" exceeds the ${MAX_DISPO_WORKBOOK_ROWS} row import limit.`,
|
||||
);
|
||||
}
|
||||
|
||||
const widestRow = rows.reduce((max, row) => Math.max(max, row.length), 0);
|
||||
if (widestRow > MAX_DISPO_WORKBOOK_COLUMNS) {
|
||||
throw new Error(
|
||||
`Worksheet "${sheetName}" in "${workbookPath}" exceeds the ${MAX_DISPO_WORKBOOK_COLUMNS} column import limit.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function isUnsupportedExcelJsTableFilterError(error: unknown): boolean {
|
||||
return error instanceof Error && error.message.includes(EXCELJS_UNSUPPORTED_TABLE_FILTER_MARKER);
|
||||
}
|
||||
|
||||
function patchExcelJsTableCompatibility(workbook: ExcelJsWorkbook): void {
|
||||
const reader = workbook.xlsx as ExcelJsXlsxReader;
|
||||
const originalProcessTableEntry = reader._processTableEntry;
|
||||
|
||||
if (typeof originalProcessTableEntry !== "function") {
|
||||
return;
|
||||
}
|
||||
|
||||
reader._processTableEntry = async function processTableEntryWithCompatibilityFallback(
|
||||
stream,
|
||||
model,
|
||||
name,
|
||||
) {
|
||||
try {
|
||||
return await originalProcessTableEntry.call(this, stream, model, name);
|
||||
} catch (error) {
|
||||
if (isUnsupportedExcelJsTableFilterError(error)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
export async function readWorksheetMatrix(
|
||||
workbookPath: string,
|
||||
sheetName: string,
|
||||
): Promise<WorksheetMatrix> {
|
||||
const resolvedPath = await validateWorkbookPath(workbookPath);
|
||||
const workbook = XLSX.readFile(resolvedPath, {
|
||||
cellDates: true,
|
||||
dense: true,
|
||||
});
|
||||
const worksheet = workbook.Sheets[sheetName];
|
||||
if (!worksheet) {
|
||||
throw new Error(`Worksheet "${sheetName}" not found in workbook "${resolvedPath}"`);
|
||||
const cacheKey = `${resolvedPath}::${sheetName}`;
|
||||
const cachedMatrix = worksheetMatrixCache.get(cacheKey);
|
||||
if (cachedMatrix) {
|
||||
return cloneWorksheetMatrix(await cachedMatrix);
|
||||
}
|
||||
|
||||
const rows = XLSX.utils.sheet_to_json<(WorksheetCellValue | null)[]>(worksheet, {
|
||||
header: 1,
|
||||
raw: true,
|
||||
defval: null,
|
||||
});
|
||||
const matrixPromise = (async () => {
|
||||
const ExcelJS = await getExcelJS();
|
||||
const workbook = new ExcelJS.Workbook();
|
||||
patchExcelJsTableCompatibility(workbook);
|
||||
await workbook.xlsx.readFile(resolvedPath, { ignoreNodes: EXCELJS_IGNORE_WORKSHEET_NODES });
|
||||
|
||||
return trimTrailingEmptyRows(
|
||||
rows.map((row: (WorksheetCellValue | null)[]) =>
|
||||
trimTrailingNulls(row.map((value: WorksheetCellValue | null) => normalizeWorksheetCellValue(value))),
|
||||
),
|
||||
);
|
||||
const worksheet = workbook.getWorksheet(sheetName);
|
||||
if (!worksheet) {
|
||||
throw new Error(`Worksheet "${sheetName}" not found in workbook "${resolvedPath}"`);
|
||||
}
|
||||
|
||||
const rows: WorksheetMatrix = [];
|
||||
for (let rowNumber = 1; rowNumber <= worksheet.rowCount; rowNumber += 1) {
|
||||
const row = worksheet.getRow(rowNumber);
|
||||
const cells: WorksheetCellValue[] = [];
|
||||
|
||||
for (let columnNumber = 1; columnNumber <= row.cellCount; columnNumber += 1) {
|
||||
cells.push(normalizeWorksheetCellValue(row.getCell(columnNumber).value));
|
||||
}
|
||||
|
||||
rows.push(trimTrailingNulls(cells));
|
||||
}
|
||||
|
||||
const normalizedRows = trimTrailingEmptyRows(rows);
|
||||
|
||||
assertWorksheetShape(normalizedRows, sheetName, resolvedPath);
|
||||
|
||||
return normalizedRows;
|
||||
})();
|
||||
|
||||
worksheetMatrixCache.set(cacheKey, matrixPromise);
|
||||
|
||||
try {
|
||||
return cloneWorksheetMatrix(await matrixPromise);
|
||||
} catch (error) {
|
||||
worksheetMatrixCache.delete(cacheKey);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
export function getCellString(
|
||||
|
||||
@@ -55,6 +55,12 @@ export async function validateDispoBatch(
|
||||
status: StagedRecordStatus.UNRESOLVED,
|
||||
},
|
||||
});
|
||||
const stagedPublicHolidayCount = await db.stagedVacation.count({
|
||||
where: {
|
||||
importBatchId: batch.id,
|
||||
vacationType: "PUBLIC_HOLIDAY",
|
||||
},
|
||||
});
|
||||
const blockingUnresolved = unresolved.filter(
|
||||
(record) =>
|
||||
!(
|
||||
@@ -70,6 +76,12 @@ export async function validateDispoBatch(
|
||||
);
|
||||
}
|
||||
|
||||
if (stagedPublicHolidayCount > 0) {
|
||||
throw new Error(
|
||||
`Import batch "${batch.id}" still contains ${stagedPublicHolidayCount} staged PUBLIC_HOLIDAY row(s). Public holidays must be synchronized through holiday calendars before commit.`,
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
batchId: batch.id,
|
||||
batchSummary: batch.summary,
|
||||
|
||||
Reference in New Issue
Block a user