feat(import): harden untrusted spreadsheet boundaries
This commit is contained in:
@@ -23,6 +23,13 @@ const paginationSchema = z.object({
|
||||
const importBatchStatusSchema = z.nativeEnum(ImportBatchStatus);
|
||||
const stagedRecordStatusSchema = z.nativeEnum(StagedRecordStatus);
|
||||
const stagedRecordTypeSchema = z.nativeEnum(DispoStagedRecordType);
|
||||
const workbookPathSchema = z
|
||||
.string()
|
||||
.trim()
|
||||
.min(1, "Workbook path is required.")
|
||||
.refine((value) => value.toLowerCase().endsWith(".xlsx"), {
|
||||
message: "Only .xlsx workbook paths are supported.",
|
||||
});
|
||||
|
||||
// ─── Router ──────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -32,12 +39,12 @@ export const dispoRouter = createTRPCRouter({
|
||||
stageImportBatch: adminProcedure
|
||||
.input(
|
||||
z.object({
|
||||
chargeabilityWorkbookPath: z.string(),
|
||||
costWorkbookPath: z.string().optional(),
|
||||
chargeabilityWorkbookPath: workbookPathSchema,
|
||||
costWorkbookPath: workbookPathSchema.optional(),
|
||||
notes: z.string().nullish(),
|
||||
planningWorkbookPath: z.string(),
|
||||
referenceWorkbookPath: z.string(),
|
||||
rosterWorkbookPath: z.string().optional(),
|
||||
planningWorkbookPath: workbookPathSchema,
|
||||
referenceWorkbookPath: workbookPathSchema,
|
||||
rosterWorkbookPath: workbookPathSchema.optional(),
|
||||
}),
|
||||
)
|
||||
.mutation(async ({ ctx, input }) => {
|
||||
@@ -56,13 +63,13 @@ export const dispoRouter = createTRPCRouter({
|
||||
validateImportBatch: adminProcedure
|
||||
.input(
|
||||
z.object({
|
||||
chargeabilityWorkbookPath: z.string(),
|
||||
costWorkbookPath: z.string().optional(),
|
||||
chargeabilityWorkbookPath: workbookPathSchema,
|
||||
costWorkbookPath: workbookPathSchema.optional(),
|
||||
importBatchId: z.string().optional(),
|
||||
notes: z.string().nullish(),
|
||||
planningWorkbookPath: z.string(),
|
||||
referenceWorkbookPath: z.string(),
|
||||
rosterWorkbookPath: z.string().optional(),
|
||||
planningWorkbookPath: workbookPathSchema,
|
||||
referenceWorkbookPath: workbookPathSchema,
|
||||
rosterWorkbookPath: workbookPathSchema.optional(),
|
||||
}),
|
||||
)
|
||||
.query(async ({ input }) => {
|
||||
|
||||
@@ -0,0 +1,58 @@
|
||||
import { cp, mkdtemp, rm, writeFile } from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import {
|
||||
MAX_DISPO_WORKBOOK_BYTES,
|
||||
readWorksheetMatrix,
|
||||
} from "../use-cases/dispo-import/read-workbook.js";
|
||||
|
||||
const referenceWorkbookPath = fileURLToPath(
|
||||
new URL("../../../../samples/Dispov2/MandatoryDispoCategories_V3.xlsx", import.meta.url),
|
||||
);
|
||||
|
||||
const tempDirectories: string[] = [];
|
||||
|
||||
afterEach(async () => {
|
||||
await Promise.all(
|
||||
tempDirectories.splice(0).map(async (directory) => {
|
||||
await rm(directory, { recursive: true, force: true });
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
async function makeTempDirectory(): Promise<string> {
|
||||
const directory = await mkdtemp(path.join(os.tmpdir(), "capakraken-read-workbook-"));
|
||||
tempDirectories.push(directory);
|
||||
return directory;
|
||||
}
|
||||
|
||||
describe("readWorksheetMatrix", () => {
|
||||
it("reads trusted xlsx worksheets through the hardened reader", async () => {
|
||||
const rows = await readWorksheetMatrix(referenceWorkbookPath, "EID-Attr");
|
||||
|
||||
expect(rows.length).toBeGreaterThan(0);
|
||||
expect(rows.some((row) => row.length > 0)).toBe(true);
|
||||
});
|
||||
|
||||
it("rejects legacy .xls workbook paths", async () => {
|
||||
const directory = await makeTempDirectory();
|
||||
const legacyPath = path.join(directory, "legacy-input.xls");
|
||||
await cp(referenceWorkbookPath, legacyPath);
|
||||
|
||||
await expect(readWorksheetMatrix(legacyPath, "EID-Attr")).rejects.toThrow(
|
||||
'Only .xlsx workbooks are supported for dispo imports',
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects oversized workbook files before parsing", async () => {
|
||||
const directory = await makeTempDirectory();
|
||||
const oversizedPath = path.join(directory, "oversized.xlsx");
|
||||
await writeFile(oversizedPath, Buffer.alloc(MAX_DISPO_WORKBOOK_BYTES + 1, 0));
|
||||
|
||||
await expect(readWorksheetMatrix(oversizedPath, "Sheet1")).rejects.toThrow(
|
||||
"Workbook file exceeds the",
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -1,8 +1,76 @@
|
||||
import * as XLSX from "xlsx";
|
||||
import { stat } from "node:fs/promises";
|
||||
import { createRequire } from "node:module";
|
||||
import path from "node:path";
|
||||
|
||||
export type WorksheetCellValue = boolean | Date | number | string | null;
|
||||
export type WorksheetMatrix = WorksheetCellValue[][];
|
||||
|
||||
type XlsxWorkbook = {
|
||||
Sheets: Record<string, unknown>;
|
||||
};
|
||||
|
||||
type SheetToJsonOptions = {
|
||||
header: 1;
|
||||
raw: true;
|
||||
defval: null;
|
||||
};
|
||||
|
||||
type XlsxRuntime = {
|
||||
readFile(filePath: string, options: { cellDates: true; dense: true }): XlsxWorkbook;
|
||||
utils: {
|
||||
sheet_to_json<T>(worksheet: unknown, options: SheetToJsonOptions): T[];
|
||||
};
|
||||
};
|
||||
|
||||
const require = createRequire(import.meta.url);
|
||||
const XLSX = require("xlsx") as XlsxRuntime;
|
||||
|
||||
const DISPO_WORKBOOK_EXTENSION = ".xlsx";
|
||||
export const MAX_DISPO_WORKBOOK_BYTES = 15 * 1024 * 1024;
|
||||
|
||||
function trimTrailingNulls(row: WorksheetCellValue[]): WorksheetCellValue[] {
|
||||
let end = row.length;
|
||||
while (end > 0 && row[end - 1] === null) {
|
||||
end -= 1;
|
||||
}
|
||||
return row.slice(0, end);
|
||||
}
|
||||
|
||||
function trimTrailingEmptyRows(rows: WorksheetMatrix): WorksheetMatrix {
|
||||
let end = rows.length;
|
||||
while (end > 0 && rows[end - 1]?.length === 0) {
|
||||
end -= 1;
|
||||
}
|
||||
return rows.slice(0, end);
|
||||
}
|
||||
|
||||
async function validateWorkbookPath(workbookPath: string): Promise<string> {
|
||||
const resolvedPath = path.resolve(workbookPath);
|
||||
|
||||
if (path.extname(resolvedPath).toLowerCase() !== DISPO_WORKBOOK_EXTENSION) {
|
||||
throw new Error(
|
||||
`Only ${DISPO_WORKBOOK_EXTENSION} workbooks are supported for dispo imports: "${resolvedPath}"`,
|
||||
);
|
||||
}
|
||||
|
||||
const fileStat = await stat(resolvedPath);
|
||||
if (!fileStat.isFile()) {
|
||||
throw new Error(`Workbook path must point to a readable file: "${resolvedPath}"`);
|
||||
}
|
||||
|
||||
if (fileStat.size <= 0) {
|
||||
throw new Error(`Workbook file is empty: "${resolvedPath}"`);
|
||||
}
|
||||
|
||||
if (fileStat.size > MAX_DISPO_WORKBOOK_BYTES) {
|
||||
throw new Error(
|
||||
`Workbook file exceeds the ${MAX_DISPO_WORKBOOK_BYTES} byte import limit: "${resolvedPath}"`,
|
||||
);
|
||||
}
|
||||
|
||||
return resolvedPath;
|
||||
}
|
||||
|
||||
function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
|
||||
if (value === undefined || value === null) {
|
||||
return null;
|
||||
@@ -16,6 +84,38 @@ function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
|
||||
return value;
|
||||
}
|
||||
|
||||
if (typeof value === "object") {
|
||||
const record = value as Record<string, unknown>;
|
||||
|
||||
if ("result" in record) {
|
||||
return normalizeWorksheetCellValue(record.result);
|
||||
}
|
||||
|
||||
if ("text" in record && typeof record.text === "string") {
|
||||
return record.text;
|
||||
}
|
||||
|
||||
if ("hyperlink" in record && typeof record.hyperlink === "string") {
|
||||
return record.hyperlink;
|
||||
}
|
||||
|
||||
if ("richText" in record && Array.isArray(record.richText)) {
|
||||
return record.richText
|
||||
.map((part) => {
|
||||
if (part && typeof part === "object" && "text" in part) {
|
||||
const text = (part as { text?: unknown }).text;
|
||||
return typeof text === "string" ? text : "";
|
||||
}
|
||||
return "";
|
||||
})
|
||||
.join("");
|
||||
}
|
||||
|
||||
if ("error" in record && typeof record.error === "string") {
|
||||
return record.error;
|
||||
}
|
||||
}
|
||||
|
||||
return String(value);
|
||||
}
|
||||
|
||||
@@ -23,13 +123,14 @@ export async function readWorksheetMatrix(
|
||||
workbookPath: string,
|
||||
sheetName: string,
|
||||
): Promise<WorksheetMatrix> {
|
||||
const workbook = XLSX.readFile(workbookPath, {
|
||||
const resolvedPath = await validateWorkbookPath(workbookPath);
|
||||
const workbook = XLSX.readFile(resolvedPath, {
|
||||
cellDates: true,
|
||||
dense: true,
|
||||
});
|
||||
const worksheet = workbook.Sheets[sheetName];
|
||||
if (!worksheet) {
|
||||
throw new Error(`Worksheet "${sheetName}" not found in workbook "${workbookPath}"`);
|
||||
throw new Error(`Worksheet "${sheetName}" not found in workbook "${resolvedPath}"`);
|
||||
}
|
||||
|
||||
const rows = XLSX.utils.sheet_to_json<(WorksheetCellValue | null)[]>(worksheet, {
|
||||
@@ -38,7 +139,11 @@ export async function readWorksheetMatrix(
|
||||
defval: null,
|
||||
});
|
||||
|
||||
return rows.map((row) => row.map((value) => normalizeWorksheetCellValue(value)));
|
||||
return trimTrailingEmptyRows(
|
||||
rows.map((row: (WorksheetCellValue | null)[]) =>
|
||||
trimTrailingNulls(row.map((value: WorksheetCellValue | null) => normalizeWorksheetCellValue(value))),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
export function getCellString(
|
||||
|
||||
Reference in New Issue
Block a user