import { existsSync } from "node:fs"; import { cp, mkdtemp, rm, writeFile } from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { fileURLToPath } from "node:url"; import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest"; import { MAX_DISPO_WORKBOOK_BYTES, MAX_DISPO_WORKBOOK_COLUMNS, MAX_DISPO_WORKBOOK_ROWS, readWorksheetMatrix, } from "../use-cases/dispo-import/read-workbook.js"; const referenceWorkbookPath = fileURLToPath( new URL("../../../../samples/Dispov2/MandatoryDispoCategories_V3.xlsx", import.meta.url), ); const chargeabilityWorkbookPath = fileURLToPath( new URL( "../../../../samples/Dispov2/20260309_Bi-Weekly_Chargeability_Reporting_Content_Production_V0.943_4Hartmut.xlsx", import.meta.url, ), ); const planningWorkbookPath = fileURLToPath( new URL("../../../../samples/Dispov2/DISPO_2026.xlsx", import.meta.url), ); // Sample xlsx fixtures are gitignored (NDA-protected real data). Skip when absent (CI). const hasSamples = existsSync(referenceWorkbookPath) && existsSync(chargeabilityWorkbookPath) && existsSync(planningWorkbookPath); const itIfSamples = hasSamples ? it : it.skip; const tempDirectories: string[] = []; // The dispo reader now enforces DISPO_IMPORT_DIR as an allowlist. Existing // tests pass absolute paths from sample fixtures or tmpdirs that live outside // any production import dir, so scope the allowlist to the filesystem root // for the test suite. New tests below restore a narrow allowlist to exercise // the containment check explicitly. const originalImportDir = process.env["DISPO_IMPORT_DIR"]; beforeAll(() => { process.env["DISPO_IMPORT_DIR"] = "/"; }); afterAll(() => { if (originalImportDir === undefined) delete process.env["DISPO_IMPORT_DIR"]; else process.env["DISPO_IMPORT_DIR"] = originalImportDir; }); afterEach(async () => { await Promise.all( tempDirectories.splice(0).map(async (directory) => { await rm(directory, { recursive: true, force: true }); }), ); }); async function makeTempDirectory(): Promise { const directory = await mkdtemp(path.join(os.tmpdir(), "capakraken-read-workbook-")); tempDirectories.push(directory); return directory; } async function writeWorkbook( filePath: string, rows: unknown[][], sheetName = "Sheet1", ): Promise { const ExcelJS = await import("exceljs"); const workbook = new ExcelJS.Workbook(); const worksheet = workbook.addWorksheet(sheetName); for (const row of rows) { worksheet.addRow(row); } await workbook.xlsx.writeFile(filePath); } describe("readWorksheetMatrix", () => { itIfSamples("reads trusted xlsx worksheets through the hardened reader", async () => { const rows = await readWorksheetMatrix(referenceWorkbookPath, "EID-Attr"); expect(rows.length).toBeGreaterThan(0); expect(rows.some((row) => row.length > 0)).toBe(true); }); itIfSamples( "tolerates workbook tables that contain unsupported exceljs date group filters", async () => { const rows = await readWorksheetMatrix(chargeabilityWorkbookPath, "ChgFC"); expect(rows.length).toBeGreaterThan(300); expect(rows[0]?.length).toBeGreaterThan(5); }, ); itIfSamples( "accepts real dispo planning worksheets within the supported width envelope", async () => { const rows = await readWorksheetMatrix(planningWorkbookPath, "Dispo"); expect(rows.length).toBeGreaterThan(500); expect(rows.some((row) => row.length > 256)).toBe(true); expect(rows.every((row) => row.length <= MAX_DISPO_WORKBOOK_COLUMNS)).toBe(true); }, ); itIfSamples("rejects legacy .xls workbook paths", async () => { const directory = await makeTempDirectory(); const legacyPath = path.join(directory, "legacy-input.xls"); await cp(referenceWorkbookPath, legacyPath); await expect(readWorksheetMatrix(legacyPath, "EID-Attr")).rejects.toThrow( "Only .xlsx workbooks are supported for dispo imports", ); }); it("rejects oversized workbook files before parsing", async () => { const directory = await makeTempDirectory(); const oversizedPath = path.join(directory, "oversized.xlsx"); await writeFile(oversizedPath, Buffer.alloc(MAX_DISPO_WORKBOOK_BYTES + 1, 0)); await expect(readWorksheetMatrix(oversizedPath, "Sheet1")).rejects.toThrow( "Workbook file exceeds the", ); }); it("rejects worksheets that exceed the row limit", async () => { const directory = await makeTempDirectory(); const workbookPath = path.join(directory, "too-many-rows.xlsx"); await writeWorkbook( workbookPath, Array.from({ length: MAX_DISPO_WORKBOOK_ROWS + 1 }, (_, index) => [`row-${index + 1}`]), ); await expect(readWorksheetMatrix(workbookPath, "Sheet1")).rejects.toThrow( `exceeds the ${MAX_DISPO_WORKBOOK_ROWS} row import limit`, ); }, 60000); it("rejects worksheets that exceed the column limit", async () => { const directory = await makeTempDirectory(); const workbookPath = path.join(directory, "too-many-columns.xlsx"); await writeWorkbook(workbookPath, [ Array.from({ length: MAX_DISPO_WORKBOOK_COLUMNS + 1 }, (_, index) => `col-${index + 1}`), ]); await expect(readWorksheetMatrix(workbookPath, "Sheet1")).rejects.toThrow( `exceeds the ${MAX_DISPO_WORKBOOK_COLUMNS} column import limit`, ); }, 60000); describe("DISPO_IMPORT_DIR allowlist", () => { it("rejects absolute paths that escape the configured import dir", async () => { const allowedDir = await makeTempDirectory(); const outsideDir = await makeTempDirectory(); const outsidePath = path.join(outsideDir, "outside.xlsx"); await writeWorkbook(outsidePath, [["a"]]); const previous = process.env["DISPO_IMPORT_DIR"]; process.env["DISPO_IMPORT_DIR"] = allowedDir; try { await expect(readWorksheetMatrix(outsidePath, "Sheet1")).rejects.toThrow( "Workbook path must be inside the configured import directory", ); } finally { process.env["DISPO_IMPORT_DIR"] = previous; } }); it("rejects relative paths that traverse out of the configured import dir", async () => { const allowedDir = await makeTempDirectory(); const siblingDir = await makeTempDirectory(); const siblingPath = path.join(siblingDir, "sibling.xlsx"); await writeWorkbook(siblingPath, [["a"]]); const relative = path.relative(allowedDir, siblingPath); expect(relative.startsWith("..")).toBe(true); const previous = process.env["DISPO_IMPORT_DIR"]; process.env["DISPO_IMPORT_DIR"] = allowedDir; try { await expect(readWorksheetMatrix(relative, "Sheet1")).rejects.toThrow( "Workbook path must be inside the configured import directory", ); } finally { process.env["DISPO_IMPORT_DIR"] = previous; } }); it("accepts paths that resolve inside the configured import dir", async () => { const allowedDir = await makeTempDirectory(); const insidePath = path.join(allowedDir, "inside.xlsx"); await writeWorkbook(insidePath, [["hello"]]); const previous = process.env["DISPO_IMPORT_DIR"]; process.env["DISPO_IMPORT_DIR"] = allowedDir; try { const rows = await readWorksheetMatrix("inside.xlsx", "Sheet1"); expect(rows[0]?.[0]).toBe("hello"); } finally { process.env["DISPO_IMPORT_DIR"] = previous; } }); }); });