feat(import): harden untrusted spreadsheet boundaries

This commit is contained in:
2026-03-30 08:02:52 +02:00
parent fac8c1c3a5
commit f6daf21983
13 changed files with 561 additions and 76 deletions
+17 -10
View File
@@ -23,6 +23,13 @@ const paginationSchema = z.object({
const importBatchStatusSchema = z.nativeEnum(ImportBatchStatus);
const stagedRecordStatusSchema = z.nativeEnum(StagedRecordStatus);
const stagedRecordTypeSchema = z.nativeEnum(DispoStagedRecordType);
const workbookPathSchema = z
.string()
.trim()
.min(1, "Workbook path is required.")
.refine((value) => value.toLowerCase().endsWith(".xlsx"), {
message: "Only .xlsx workbook paths are supported.",
});
// ─── Router ──────────────────────────────────────────────────────────────────
@@ -32,12 +39,12 @@ export const dispoRouter = createTRPCRouter({
stageImportBatch: adminProcedure
.input(
z.object({
chargeabilityWorkbookPath: z.string(),
costWorkbookPath: z.string().optional(),
chargeabilityWorkbookPath: workbookPathSchema,
costWorkbookPath: workbookPathSchema.optional(),
notes: z.string().nullish(),
planningWorkbookPath: z.string(),
referenceWorkbookPath: z.string(),
rosterWorkbookPath: z.string().optional(),
planningWorkbookPath: workbookPathSchema,
referenceWorkbookPath: workbookPathSchema,
rosterWorkbookPath: workbookPathSchema.optional(),
}),
)
.mutation(async ({ ctx, input }) => {
@@ -56,13 +63,13 @@ export const dispoRouter = createTRPCRouter({
validateImportBatch: adminProcedure
.input(
z.object({
chargeabilityWorkbookPath: z.string(),
costWorkbookPath: z.string().optional(),
chargeabilityWorkbookPath: workbookPathSchema,
costWorkbookPath: workbookPathSchema.optional(),
importBatchId: z.string().optional(),
notes: z.string().nullish(),
planningWorkbookPath: z.string(),
referenceWorkbookPath: z.string(),
rosterWorkbookPath: z.string().optional(),
planningWorkbookPath: workbookPathSchema,
referenceWorkbookPath: workbookPathSchema,
rosterWorkbookPath: workbookPathSchema.optional(),
}),
)
.query(async ({ input }) => {
@@ -0,0 +1,58 @@
import { cp, mkdtemp, rm, writeFile } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { afterEach, describe, expect, it } from "vitest";
import {
MAX_DISPO_WORKBOOK_BYTES,
readWorksheetMatrix,
} from "../use-cases/dispo-import/read-workbook.js";
const referenceWorkbookPath = fileURLToPath(
new URL("../../../../samples/Dispov2/MandatoryDispoCategories_V3.xlsx", import.meta.url),
);
const tempDirectories: string[] = [];
afterEach(async () => {
await Promise.all(
tempDirectories.splice(0).map(async (directory) => {
await rm(directory, { recursive: true, force: true });
}),
);
});
async function makeTempDirectory(): Promise<string> {
const directory = await mkdtemp(path.join(os.tmpdir(), "capakraken-read-workbook-"));
tempDirectories.push(directory);
return directory;
}
describe("readWorksheetMatrix", () => {
it("reads trusted xlsx worksheets through the hardened reader", async () => {
const rows = await readWorksheetMatrix(referenceWorkbookPath, "EID-Attr");
expect(rows.length).toBeGreaterThan(0);
expect(rows.some((row) => row.length > 0)).toBe(true);
});
it("rejects legacy .xls workbook paths", async () => {
const directory = await makeTempDirectory();
const legacyPath = path.join(directory, "legacy-input.xls");
await cp(referenceWorkbookPath, legacyPath);
await expect(readWorksheetMatrix(legacyPath, "EID-Attr")).rejects.toThrow(
'Only .xlsx workbooks are supported for dispo imports',
);
});
it("rejects oversized workbook files before parsing", async () => {
const directory = await makeTempDirectory();
const oversizedPath = path.join(directory, "oversized.xlsx");
await writeFile(oversizedPath, Buffer.alloc(MAX_DISPO_WORKBOOK_BYTES + 1, 0));
await expect(readWorksheetMatrix(oversizedPath, "Sheet1")).rejects.toThrow(
"Workbook file exceeds the",
);
});
});
@@ -1,8 +1,76 @@
import * as XLSX from "xlsx";
import { stat } from "node:fs/promises";
import { createRequire } from "node:module";
import path from "node:path";
export type WorksheetCellValue = boolean | Date | number | string | null;
export type WorksheetMatrix = WorksheetCellValue[][];
type XlsxWorkbook = {
Sheets: Record<string, unknown>;
};
type SheetToJsonOptions = {
header: 1;
raw: true;
defval: null;
};
type XlsxRuntime = {
readFile(filePath: string, options: { cellDates: true; dense: true }): XlsxWorkbook;
utils: {
sheet_to_json<T>(worksheet: unknown, options: SheetToJsonOptions): T[];
};
};
const require = createRequire(import.meta.url);
const XLSX = require("xlsx") as XlsxRuntime;
const DISPO_WORKBOOK_EXTENSION = ".xlsx";
export const MAX_DISPO_WORKBOOK_BYTES = 15 * 1024 * 1024;
function trimTrailingNulls(row: WorksheetCellValue[]): WorksheetCellValue[] {
let end = row.length;
while (end > 0 && row[end - 1] === null) {
end -= 1;
}
return row.slice(0, end);
}
function trimTrailingEmptyRows(rows: WorksheetMatrix): WorksheetMatrix {
let end = rows.length;
while (end > 0 && rows[end - 1]?.length === 0) {
end -= 1;
}
return rows.slice(0, end);
}
async function validateWorkbookPath(workbookPath: string): Promise<string> {
const resolvedPath = path.resolve(workbookPath);
if (path.extname(resolvedPath).toLowerCase() !== DISPO_WORKBOOK_EXTENSION) {
throw new Error(
`Only ${DISPO_WORKBOOK_EXTENSION} workbooks are supported for dispo imports: "${resolvedPath}"`,
);
}
const fileStat = await stat(resolvedPath);
if (!fileStat.isFile()) {
throw new Error(`Workbook path must point to a readable file: "${resolvedPath}"`);
}
if (fileStat.size <= 0) {
throw new Error(`Workbook file is empty: "${resolvedPath}"`);
}
if (fileStat.size > MAX_DISPO_WORKBOOK_BYTES) {
throw new Error(
`Workbook file exceeds the ${MAX_DISPO_WORKBOOK_BYTES} byte import limit: "${resolvedPath}"`,
);
}
return resolvedPath;
}
function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
if (value === undefined || value === null) {
return null;
@@ -16,6 +84,38 @@ function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
return value;
}
if (typeof value === "object") {
const record = value as Record<string, unknown>;
if ("result" in record) {
return normalizeWorksheetCellValue(record.result);
}
if ("text" in record && typeof record.text === "string") {
return record.text;
}
if ("hyperlink" in record && typeof record.hyperlink === "string") {
return record.hyperlink;
}
if ("richText" in record && Array.isArray(record.richText)) {
return record.richText
.map((part) => {
if (part && typeof part === "object" && "text" in part) {
const text = (part as { text?: unknown }).text;
return typeof text === "string" ? text : "";
}
return "";
})
.join("");
}
if ("error" in record && typeof record.error === "string") {
return record.error;
}
}
return String(value);
}
@@ -23,13 +123,14 @@ export async function readWorksheetMatrix(
workbookPath: string,
sheetName: string,
): Promise<WorksheetMatrix> {
const workbook = XLSX.readFile(workbookPath, {
const resolvedPath = await validateWorkbookPath(workbookPath);
const workbook = XLSX.readFile(resolvedPath, {
cellDates: true,
dense: true,
});
const worksheet = workbook.Sheets[sheetName];
if (!worksheet) {
throw new Error(`Worksheet "${sheetName}" not found in workbook "${workbookPath}"`);
throw new Error(`Worksheet "${sheetName}" not found in workbook "${resolvedPath}"`);
}
const rows = XLSX.utils.sheet_to_json<(WorksheetCellValue | null)[]>(worksheet, {
@@ -38,7 +139,11 @@ export async function readWorksheetMatrix(
defval: null,
});
return rows.map((row) => row.map((value) => normalizeWorksheetCellValue(value)));
return trimTrailingEmptyRows(
rows.map((row: (WorksheetCellValue | null)[]) =>
trimTrailingNulls(row.map((value: WorksheetCellValue | null) => normalizeWorksheetCellValue(value))),
),
);
}
export function getCellString(