security: workbook path allowlist + stronger image polyglot validation (#54)

- dispo workbook imports are pinned to DISPO_IMPORT_DIR (default ./imports):
  tRPC input rejects absolute paths and .. segments, runtime reader
  re-validates containment via path.relative. Closes a path-traversal
  class that reached ExcelJS CVEs through admin/compromised tokens.
- image validator now checks the full 8-byte PNG magic, enforces PNG IEND
  and JPEG EOI trailers, scans the decoded buffer for markup polyglot
  markers (<script, <svg, <iframe, javascript:, onerror=, ...), and
  explicitly rejects SVG. Provider-generated covers (DALL-E, Gemini) run
  through the same validator before persistence — an untrusted upstream
  cannot smuggle a stored-XSS payload past us.
- added image-validation.test.ts and tightened documentation.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-17 15:26:29 +02:00
parent 3392297791
commit c4b01c1bfc
11 changed files with 394 additions and 65 deletions
@@ -1,6 +1,6 @@
import { existsSync } from "node:fs";
import { fileURLToPath } from "node:url";
import { describe, expect, it, vi } from "vitest";
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
import {
assessDispoImportReadiness,
parseDispoChargeabilityWorkbook,
@@ -47,6 +47,19 @@ const hasSamples = [
costWorkbookPath,
].every((p) => existsSync(p));
// The dispo reader enforces DISPO_IMPORT_DIR as an allowlist. Sample fixtures
// live at the repo root (outside any production import dir), so scope the
// allowlist to `/` for this suite; a dedicated suite in read-workbook.test.ts
// exercises the containment check explicitly.
const originalImportDir = process.env["DISPO_IMPORT_DIR"];
beforeAll(() => {
process.env["DISPO_IMPORT_DIR"] = "/";
});
afterAll(() => {
if (originalImportDir === undefined) delete process.env["DISPO_IMPORT_DIR"];
else process.env["DISPO_IMPORT_DIR"] = originalImportDir;
});
describe.skipIf(!hasSamples)("dispo import", () => {
it("parses the mandatory reference workbook into normalized master data", async () => {
const parsed = await parseMandatoryDispoReferenceWorkbook(mandatoryWorkbookPath);
@@ -3,7 +3,7 @@ import { cp, mkdtemp, rm, writeFile } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { afterEach, describe, expect, it } from "vitest";
import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest";
import {
MAX_DISPO_WORKBOOK_BYTES,
MAX_DISPO_WORKBOOK_COLUMNS,
@@ -33,6 +33,20 @@ const itIfSamples = hasSamples ? it : it.skip;
const tempDirectories: string[] = [];
// The dispo reader now enforces DISPO_IMPORT_DIR as an allowlist. Existing
// tests pass absolute paths from sample fixtures or tmpdirs that live outside
// any production import dir, so scope the allowlist to the filesystem root
// for the test suite. New tests below restore a narrow allowlist to exercise
// the containment check explicitly.
const originalImportDir = process.env["DISPO_IMPORT_DIR"];
beforeAll(() => {
process.env["DISPO_IMPORT_DIR"] = "/";
});
afterAll(() => {
if (originalImportDir === undefined) delete process.env["DISPO_IMPORT_DIR"];
else process.env["DISPO_IMPORT_DIR"] = originalImportDir;
});
afterEach(async () => {
await Promise.all(
tempDirectories.splice(0).map(async (directory) => {
@@ -136,4 +150,58 @@ describe("readWorksheetMatrix", () => {
`exceeds the ${MAX_DISPO_WORKBOOK_COLUMNS} column import limit`,
);
}, 30000);
describe("DISPO_IMPORT_DIR allowlist", () => {
it("rejects absolute paths that escape the configured import dir", async () => {
const allowedDir = await makeTempDirectory();
const outsideDir = await makeTempDirectory();
const outsidePath = path.join(outsideDir, "outside.xlsx");
await writeWorkbook(outsidePath, [["a"]]);
const previous = process.env["DISPO_IMPORT_DIR"];
process.env["DISPO_IMPORT_DIR"] = allowedDir;
try {
await expect(readWorksheetMatrix(outsidePath, "Sheet1")).rejects.toThrow(
"Workbook path must be inside the configured import directory",
);
} finally {
process.env["DISPO_IMPORT_DIR"] = previous;
}
});
it("rejects relative paths that traverse out of the configured import dir", async () => {
const allowedDir = await makeTempDirectory();
const siblingDir = await makeTempDirectory();
const siblingPath = path.join(siblingDir, "sibling.xlsx");
await writeWorkbook(siblingPath, [["a"]]);
const relative = path.relative(allowedDir, siblingPath);
expect(relative.startsWith("..")).toBe(true);
const previous = process.env["DISPO_IMPORT_DIR"];
process.env["DISPO_IMPORT_DIR"] = allowedDir;
try {
await expect(readWorksheetMatrix(relative, "Sheet1")).rejects.toThrow(
"Workbook path must be inside the configured import directory",
);
} finally {
process.env["DISPO_IMPORT_DIR"] = previous;
}
});
it("accepts paths that resolve inside the configured import dir", async () => {
const allowedDir = await makeTempDirectory();
const insidePath = path.join(allowedDir, "inside.xlsx");
await writeWorkbook(insidePath, [["hello"]]);
const previous = process.env["DISPO_IMPORT_DIR"];
process.env["DISPO_IMPORT_DIR"] = allowedDir;
try {
const rows = await readWorksheetMatrix("inside.xlsx", "Sheet1");
expect(rows[0]?.[0]).toBe("hello");
} finally {
process.env["DISPO_IMPORT_DIR"] = previous;
}
});
});
});
@@ -4,6 +4,18 @@ import path from "node:path";
export type WorksheetCellValue = boolean | Date | number | string | null;
export type WorksheetMatrix = WorksheetCellValue[][];
// Path allowlist: dispo workbooks must live inside DISPO_IMPORT_DIR. Without
// this guard an admin (or a compromised admin token) could point the ExcelJS
// parser at any file the app process can read, reaching library CVEs on
// arbitrary filesystem paths. Default picks an in-repo `imports/` directory so
// local dev still works; production deployments should set DISPO_IMPORT_DIR
// explicitly to a dedicated volume.
function resolveImportDir(): string {
const configured = process.env["DISPO_IMPORT_DIR"];
const base = configured && configured.trim().length > 0 ? configured : path.resolve("imports");
return path.resolve(base);
}
type ExcelJsModule = typeof import("exceljs");
type ExcelJsWorkbook = InstanceType<ExcelJsModule["Workbook"]>;
type ExcelJsXlsxReader = ExcelJsWorkbook["xlsx"] & {
@@ -25,7 +37,9 @@ const EXCELJS_UNSUPPORTED_TABLE_FILTER_MARKER = '"name":"dateGroupItem"';
let _excelJs: ExcelJsModule | null = null;
const worksheetMatrixCache = new Map<string, Promise<WorksheetMatrix>>();
function normalizeExcelJsModule(module: ExcelJsModule | { default?: ExcelJsModule }): ExcelJsModule {
function normalizeExcelJsModule(
module: ExcelJsModule | { default?: ExcelJsModule },
): ExcelJsModule {
return "Workbook" in module ? module : (module.default as ExcelJsModule);
}
@@ -58,7 +72,19 @@ function cloneWorksheetMatrix(rows: WorksheetMatrix): WorksheetMatrix {
}
async function validateWorkbookPath(workbookPath: string): Promise<string> {
const resolvedPath = path.resolve(workbookPath);
const importDir = resolveImportDir();
const resolvedPath = path.resolve(importDir, workbookPath);
// path.relative returns a string that either starts with ".." (or equals
// "..") or is absolute when the resolved path escapes importDir. Both are
// rejected — defence against `..` sequences, symlink-shaped escapes and
// absolute-path injection via the tRPC surface.
const relative = path.relative(importDir, resolvedPath);
if (relative === ".." || relative.startsWith(`..${path.sep}`) || path.isAbsolute(relative)) {
throw new Error(
`Workbook path must be inside the configured import directory: "${workbookPath}"`,
);
}
if (path.extname(resolvedPath).toLowerCase() !== DISPO_WORKBOOK_EXTENSION) {
throw new Error(
@@ -132,7 +158,11 @@ function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
return String(value);
}
function assertWorksheetShape(rows: WorksheetMatrix, sheetName: string, workbookPath: string): void {
function assertWorksheetShape(
rows: WorksheetMatrix,
sheetName: string,
workbookPath: string,
): void {
if (rows.length > MAX_DISPO_WORKBOOK_ROWS) {
throw new Error(
`Worksheet "${sheetName}" in "${workbookPath}" exceeds the ${MAX_DISPO_WORKBOOK_ROWS} row import limit.`,