Files
CapaKraken/packages/application/src/__tests__/read-workbook.test.ts
T
Hartmut d9a7ec0338
CI / Architecture Guardrails (push) Successful in 2m39s
CI / Lint (push) Successful in 7m11s
CI / Assistant Split Regression (push) Successful in 8m57s
CI / Typecheck (push) Successful in 12m1s
CI / Unit Tests (push) Successful in 10m18s
CI / Build (push) Successful in 9m29s
CI / E2E Tests (push) Successful in 5m52s
CI / Fresh-Linux Docker Deploy (push) Successful in 6m54s
CI / Release Images (push) Successful in 4m39s
Nightly Security / Dependency Audit (push) Failing after 1m44s
test(application): bump exceljs row/column-limit test timeouts to 60s
Run #115 on main timed out after 30s on the Gitea runner under
concurrent-job load (writing 10001 rows via ExcelJS addRow + writeFile
is CPU-bound and CI contention pushed it past the previous threshold).
Locally these tests complete in ~1s, so doubling the budget removes
the flake without masking real regressions.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-18 14:09:10 +02:00

208 lines
7.4 KiB
TypeScript

import { existsSync } from "node:fs";
import { cp, mkdtemp, rm, writeFile } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest";
import {
MAX_DISPO_WORKBOOK_BYTES,
MAX_DISPO_WORKBOOK_COLUMNS,
MAX_DISPO_WORKBOOK_ROWS,
readWorksheetMatrix,
} from "../use-cases/dispo-import/read-workbook.js";
const referenceWorkbookPath = fileURLToPath(
new URL("../../../../samples/Dispov2/MandatoryDispoCategories_V3.xlsx", import.meta.url),
);
const chargeabilityWorkbookPath = fileURLToPath(
new URL(
"../../../../samples/Dispov2/20260309_Bi-Weekly_Chargeability_Reporting_Content_Production_V0.943_4Hartmut.xlsx",
import.meta.url,
),
);
const planningWorkbookPath = fileURLToPath(
new URL("../../../../samples/Dispov2/DISPO_2026.xlsx", import.meta.url),
);
// Sample xlsx fixtures are gitignored (NDA-protected real data). Skip when absent (CI).
const hasSamples =
existsSync(referenceWorkbookPath) &&
existsSync(chargeabilityWorkbookPath) &&
existsSync(planningWorkbookPath);
const itIfSamples = hasSamples ? it : it.skip;
const tempDirectories: string[] = [];
// The dispo reader now enforces DISPO_IMPORT_DIR as an allowlist. Existing
// tests pass absolute paths from sample fixtures or tmpdirs that live outside
// any production import dir, so scope the allowlist to the filesystem root
// for the test suite. New tests below restore a narrow allowlist to exercise
// the containment check explicitly.
const originalImportDir = process.env["DISPO_IMPORT_DIR"];
beforeAll(() => {
process.env["DISPO_IMPORT_DIR"] = "/";
});
afterAll(() => {
if (originalImportDir === undefined) delete process.env["DISPO_IMPORT_DIR"];
else process.env["DISPO_IMPORT_DIR"] = originalImportDir;
});
afterEach(async () => {
await Promise.all(
tempDirectories.splice(0).map(async (directory) => {
await rm(directory, { recursive: true, force: true });
}),
);
});
async function makeTempDirectory(): Promise<string> {
const directory = await mkdtemp(path.join(os.tmpdir(), "capakraken-read-workbook-"));
tempDirectories.push(directory);
return directory;
}
async function writeWorkbook(
filePath: string,
rows: unknown[][],
sheetName = "Sheet1",
): Promise<void> {
const ExcelJS = await import("exceljs");
const workbook = new ExcelJS.Workbook();
const worksheet = workbook.addWorksheet(sheetName);
for (const row of rows) {
worksheet.addRow(row);
}
await workbook.xlsx.writeFile(filePath);
}
describe("readWorksheetMatrix", () => {
itIfSamples("reads trusted xlsx worksheets through the hardened reader", async () => {
const rows = await readWorksheetMatrix(referenceWorkbookPath, "EID-Attr");
expect(rows.length).toBeGreaterThan(0);
expect(rows.some((row) => row.length > 0)).toBe(true);
});
itIfSamples(
"tolerates workbook tables that contain unsupported exceljs date group filters",
async () => {
const rows = await readWorksheetMatrix(chargeabilityWorkbookPath, "ChgFC");
expect(rows.length).toBeGreaterThan(300);
expect(rows[0]?.length).toBeGreaterThan(5);
},
);
itIfSamples(
"accepts real dispo planning worksheets within the supported width envelope",
async () => {
const rows = await readWorksheetMatrix(planningWorkbookPath, "Dispo");
expect(rows.length).toBeGreaterThan(500);
expect(rows.some((row) => row.length > 256)).toBe(true);
expect(rows.every((row) => row.length <= MAX_DISPO_WORKBOOK_COLUMNS)).toBe(true);
},
);
itIfSamples("rejects legacy .xls workbook paths", async () => {
const directory = await makeTempDirectory();
const legacyPath = path.join(directory, "legacy-input.xls");
await cp(referenceWorkbookPath, legacyPath);
await expect(readWorksheetMatrix(legacyPath, "EID-Attr")).rejects.toThrow(
"Only .xlsx workbooks are supported for dispo imports",
);
});
it("rejects oversized workbook files before parsing", async () => {
const directory = await makeTempDirectory();
const oversizedPath = path.join(directory, "oversized.xlsx");
await writeFile(oversizedPath, Buffer.alloc(MAX_DISPO_WORKBOOK_BYTES + 1, 0));
await expect(readWorksheetMatrix(oversizedPath, "Sheet1")).rejects.toThrow(
"Workbook file exceeds the",
);
});
it("rejects worksheets that exceed the row limit", async () => {
const directory = await makeTempDirectory();
const workbookPath = path.join(directory, "too-many-rows.xlsx");
await writeWorkbook(
workbookPath,
Array.from({ length: MAX_DISPO_WORKBOOK_ROWS + 1 }, (_, index) => [`row-${index + 1}`]),
);
await expect(readWorksheetMatrix(workbookPath, "Sheet1")).rejects.toThrow(
`exceeds the ${MAX_DISPO_WORKBOOK_ROWS} row import limit`,
);
}, 60000);
it("rejects worksheets that exceed the column limit", async () => {
const directory = await makeTempDirectory();
const workbookPath = path.join(directory, "too-many-columns.xlsx");
await writeWorkbook(workbookPath, [
Array.from({ length: MAX_DISPO_WORKBOOK_COLUMNS + 1 }, (_, index) => `col-${index + 1}`),
]);
await expect(readWorksheetMatrix(workbookPath, "Sheet1")).rejects.toThrow(
`exceeds the ${MAX_DISPO_WORKBOOK_COLUMNS} column import limit`,
);
}, 60000);
describe("DISPO_IMPORT_DIR allowlist", () => {
it("rejects absolute paths that escape the configured import dir", async () => {
const allowedDir = await makeTempDirectory();
const outsideDir = await makeTempDirectory();
const outsidePath = path.join(outsideDir, "outside.xlsx");
await writeWorkbook(outsidePath, [["a"]]);
const previous = process.env["DISPO_IMPORT_DIR"];
process.env["DISPO_IMPORT_DIR"] = allowedDir;
try {
await expect(readWorksheetMatrix(outsidePath, "Sheet1")).rejects.toThrow(
"Workbook path must be inside the configured import directory",
);
} finally {
process.env["DISPO_IMPORT_DIR"] = previous;
}
});
it("rejects relative paths that traverse out of the configured import dir", async () => {
const allowedDir = await makeTempDirectory();
const siblingDir = await makeTempDirectory();
const siblingPath = path.join(siblingDir, "sibling.xlsx");
await writeWorkbook(siblingPath, [["a"]]);
const relative = path.relative(allowedDir, siblingPath);
expect(relative.startsWith("..")).toBe(true);
const previous = process.env["DISPO_IMPORT_DIR"];
process.env["DISPO_IMPORT_DIR"] = allowedDir;
try {
await expect(readWorksheetMatrix(relative, "Sheet1")).rejects.toThrow(
"Workbook path must be inside the configured import directory",
);
} finally {
process.env["DISPO_IMPORT_DIR"] = previous;
}
});
it("accepts paths that resolve inside the configured import dir", async () => {
const allowedDir = await makeTempDirectory();
const insidePath = path.join(allowedDir, "inside.xlsx");
await writeWorkbook(insidePath, [["hello"]]);
const previous = process.env["DISPO_IMPORT_DIR"];
process.env["DISPO_IMPORT_DIR"] = allowedDir;
try {
const rows = await readWorksheetMatrix("inside.xlsx", "Sheet1");
expect(rows[0]?.[0]).toBe("hello");
} finally {
process.env["DISPO_IMPORT_DIR"] = previous;
}
});
});
});