security: workbook path allowlist + stronger image polyglot validation (#54)

- dispo workbook imports are pinned to DISPO_IMPORT_DIR (default ./imports):
  tRPC input rejects absolute paths and .. segments, runtime reader
  re-validates containment via path.relative. Closes a path-traversal
  class that reached ExcelJS CVEs through admin/compromised tokens.
- image validator now checks the full 8-byte PNG magic, enforces PNG IEND
  and JPEG EOI trailers, scans the decoded buffer for markup polyglot
  markers (<script, <svg, <iframe, javascript:, onerror=, ...), and
  explicitly rejects SVG. Provider-generated covers (DALL-E, Gemini) run
  through the same validator before persistence — an untrusted upstream
  cannot smuggle a stored-XSS payload past us.
- added image-validation.test.ts and tightened documentation.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-17 15:26:29 +02:00
parent 3392297791
commit c4b01c1bfc
11 changed files with 394 additions and 65 deletions
@@ -58,22 +58,22 @@ describe("assistant dispo import batch delegation tools", () => {
const result = await executeTool(
"stage_dispo_import_batch",
JSON.stringify({
chargeabilityWorkbookPath: "/imports/chargeability.xlsx",
planningWorkbookPath: "/imports/planning.xlsx",
referenceWorkbookPath: "/imports/reference.xlsx",
costWorkbookPath: "/imports/cost.xlsx",
rosterWorkbookPath: "/imports/roster.xlsx",
chargeabilityWorkbookPath: "chargeability.xlsx",
planningWorkbookPath: "planning.xlsx",
referenceWorkbookPath: "reference.xlsx",
costWorkbookPath: "cost.xlsx",
rosterWorkbookPath: "roster.xlsx",
notes: "March import",
}),
ctx,
);
expect(stageDispoImportBatch).toHaveBeenCalledWith(ctx.db, {
chargeabilityWorkbookPath: "/imports/chargeability.xlsx",
planningWorkbookPath: "/imports/planning.xlsx",
referenceWorkbookPath: "/imports/reference.xlsx",
costWorkbookPath: "/imports/cost.xlsx",
rosterWorkbookPath: "/imports/roster.xlsx",
chargeabilityWorkbookPath: "chargeability.xlsx",
planningWorkbookPath: "planning.xlsx",
referenceWorkbookPath: "reference.xlsx",
costWorkbookPath: "cost.xlsx",
rosterWorkbookPath: "roster.xlsx",
notes: "March import",
});
expect(JSON.parse(result.content)).toEqual({
@@ -92,18 +92,18 @@ describe("assistant dispo import batch delegation tools", () => {
const result = await executeTool(
"validate_dispo_import_batch",
JSON.stringify({
chargeabilityWorkbookPath: "/imports/chargeability.xlsx",
planningWorkbookPath: "/imports/planning.xlsx",
referenceWorkbookPath: "/imports/reference.xlsx",
chargeabilityWorkbookPath: "chargeability.xlsx",
planningWorkbookPath: "planning.xlsx",
referenceWorkbookPath: "reference.xlsx",
importBatchId: "batch_1",
}),
ctx,
);
expect(assessDispoImportReadiness).toHaveBeenCalledWith({
chargeabilityWorkbookPath: "/imports/chargeability.xlsx",
planningWorkbookPath: "/imports/planning.xlsx",
referenceWorkbookPath: "/imports/reference.xlsx",
chargeabilityWorkbookPath: "chargeability.xlsx",
planningWorkbookPath: "planning.xlsx",
referenceWorkbookPath: "reference.xlsx",
importBatchId: "batch_1",
});
expect(JSON.parse(result.content)).toEqual({
@@ -41,7 +41,7 @@ vi.mock("../ai-client.js", async (importOriginal) => {
createDalleClient: vi.fn(() => ({
images: {
generate: vi.fn().mockResolvedValue({
data: [{ b64_json: "ZmFrZQ==" }],
data: [{ b64_json: "iVBORw0KGgoAAAAASUVORK5CYII=" }],
}),
},
})),
@@ -49,10 +49,7 @@ vi.mock("../ai-client.js", async (importOriginal) => {
};
});
import {
createToolContext,
executeTool,
} from "./assistant-tools-project-media-test-helpers.js";
import { createToolContext, executeTool } from "./assistant-tools-project-media-test-helpers.js";
describe("assistant project cover generation tools", () => {
beforeEach(() => {
@@ -60,7 +57,8 @@ describe("assistant project cover generation tools", () => {
});
it("routes project cover generation through the real project router path", async () => {
const projectFindUnique = vi.fn()
const projectFindUnique = vi
.fn()
.mockResolvedValueOnce({
id: "project_1",
name: "Project One",
@@ -84,7 +82,7 @@ describe("assistant project cover generation tools", () => {
});
const projectUpdate = vi.fn().mockResolvedValue({
id: "project_1",
coverImageUrl: "data:image/png;base64,ZmFrZQ==",
coverImageUrl: "data:image/png;base64,iVBORw0KGgoAAAAASUVORK5CYII=",
});
const ctx = createToolContext(
{
@@ -119,7 +117,7 @@ describe("assistant project cover generation tools", () => {
expect(projectUpdate).toHaveBeenCalledWith({
where: { id: "project_1" },
data: { coverImageUrl: "data:image/png;base64,ZmFrZQ==" },
data: { coverImageUrl: "data:image/png;base64,iVBORw0KGgoAAAAASUVORK5CYII=" },
});
expect(projectFindUnique).toHaveBeenCalledWith({
where: { id: "project_1" },
@@ -0,0 +1,82 @@
import { describe, expect, it } from "vitest";
import { validateImageDataUrl } from "../lib/image-validation.js";
const PNG_HEADER = [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a];
const PNG_IEND = [0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82];
const JPEG_HEADER = [0xff, 0xd8, 0xff, 0xe0];
const JPEG_EOI = [0xff, 0xd9];
function dataUrl(mime: string, bytes: number[]): string {
const base64 = Buffer.from(Uint8Array.from(bytes)).toString("base64");
return `data:${mime};base64,${base64}`;
}
describe("validateImageDataUrl", () => {
it("accepts a minimal well-formed PNG", () => {
const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00, ...PNG_IEND];
expect(validateImageDataUrl(dataUrl("image/png", bytes))).toEqual({ valid: true });
});
it("accepts a minimal well-formed JPEG", () => {
const bytes = [...JPEG_HEADER, 0x00, 0x00, ...JPEG_EOI];
expect(validateImageDataUrl(dataUrl("image/jpeg", bytes))).toEqual({ valid: true });
});
it("rejects SVG uploads explicitly", () => {
const svgBytes = Buffer.from("<svg xmlns='http://www.w3.org/2000/svg'/>", "utf8");
const base64 = svgBytes.toString("base64");
const result = validateImageDataUrl(`data:image/svg+xml;base64,${base64}`);
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/SVG/i);
});
it("rejects a polyglot PNG with an HTML tail after IEND", () => {
const html = Buffer.from("<!doctype html><script>alert(1)</script>", "utf8");
const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00, ...PNG_IEND, ...Array.from(html)];
const result = validateImageDataUrl(dataUrl("image/png", bytes));
expect(result.valid).toBe(false);
// Either the IEND-trailer check or the polyglot scan is acceptable — both
// reject the payload before it reaches storage. A tail after IEND naturally
// fails the trailer check first.
if (!result.valid) expect(result.reason).toMatch(/IEND|polyglot/i);
});
it("rejects a PNG that does not end with IEND", () => {
// Declare PNG and include header but truncate before IEND
const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00];
const result = validateImageDataUrl(dataUrl("image/png", bytes));
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/IEND/);
});
it("rejects a JPEG that does not end with the EOI marker", () => {
const bytes = [...JPEG_HEADER, 0x00, 0x00];
const result = validateImageDataUrl(dataUrl("image/jpeg", bytes));
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/EOI/);
});
it("rejects a MIME/content mismatch", () => {
const bytes = [...PNG_HEADER, 0x00, ...PNG_IEND];
const result = validateImageDataUrl(dataUrl("image/jpeg", bytes));
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/mismatch/i);
});
it("rejects a javascript: URL embedded in an EXIF-like comment", () => {
const marker = Buffer.from("javascript:alert(1)", "utf8");
const bytes = [...JPEG_HEADER, ...Array.from(marker), ...JPEG_EOI];
const result = validateImageDataUrl(dataUrl("image/jpeg", bytes));
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/polyglot/i);
});
it("rejects a non-data-URL string", () => {
expect(validateImageDataUrl("not a data url").valid).toBe(false);
});
it("rejects an empty decoded buffer", () => {
const result = validateImageDataUrl("data:image/png;base64,");
expect(result.valid).toBe(false);
});
});
+118 -19
View File
@@ -1,6 +1,11 @@
/**
* Validates that the actual bytes of a base64-encoded image match its declared MIME type.
* This prevents attackers from uploading malicious files with a spoofed extension/MIME.
* Validates that a base64 image data URL is a self-consistent image of its
* declared MIME type, and contains no polyglot markers (HTML/SVG/script tails
* masquerading under a valid image header). Note: this is validation, not
* sanitisation — we do not re-encode pixel data. The security goal is to
* prevent a user-uploaded data URL from ever passing if it contains anything
* a browser could later interpret as markup when the data URL is served
* somewhere less strict than `<img src>`.
*/
interface MagicSignature {
@@ -8,16 +13,39 @@ interface MagicSignature {
bytes: number[];
}
// Full PNG magic (8 bytes) and JPEG SOI (3 bytes). Older implementations used
// shorter prefixes which allowed polyglot payloads whose non-header bytes
// differed from the declared format.
const SIGNATURES: MagicSignature[] = [
{ mimeType: "image/png", bytes: [0x89, 0x50, 0x4e, 0x47] }, // .PNG
{ mimeType: "image/png", bytes: [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a] },
{ mimeType: "image/jpeg", bytes: [0xff, 0xd8, 0xff] },
{ mimeType: "image/webp", bytes: [0x52, 0x49, 0x46, 0x46] }, // RIFF (WebP starts with RIFF....WEBP)
{ mimeType: "image/gif", bytes: [0x47, 0x49, 0x46, 0x38] }, // GIF8
{ mimeType: "image/bmp", bytes: [0x42, 0x4d] }, // BM
{ mimeType: "image/tiff", bytes: [0x49, 0x49, 0x2a, 0x00] }, // Little-endian TIFF
{ mimeType: "image/tiff", bytes: [0x4d, 0x4d, 0x00, 0x2a] }, // Big-endian TIFF
{ mimeType: "image/gif", bytes: [0x47, 0x49, 0x46, 0x38] },
{ mimeType: "image/bmp", bytes: [0x42, 0x4d] },
{ mimeType: "image/tiff", bytes: [0x49, 0x49, 0x2a, 0x00] },
{ mimeType: "image/tiff", bytes: [0x4d, 0x4d, 0x00, 0x2a] },
];
// Polyglot markers — byte sequences that must never appear inside a bona-fide
// raster image. If any of these appears, the decoded content contains a
// tail/comment section that a browser or downstream parser could interpret as
// markup, giving us a stored-XSS vector if the bytes are ever served with a
// non-strict MIME. All comparisons are lowercased.
const POLYGLOT_MARKERS = [
"<!doctype",
"<script",
"<svg",
"<html",
"<iframe",
"<object",
"<embed",
"javascript:",
"onerror=",
"onload=",
];
const MAX_IMAGE_BYTES_FOR_VALIDATION = 16 * 1024 * 1024; // refuse to decode anything silly-large
/**
* Detects the actual MIME type of a binary buffer by checking magic bytes.
* Returns null if no known image signature matches.
@@ -37,12 +65,76 @@ export function detectImageMime(buffer: Uint8Array): string | null {
return null;
}
function endsWith(buffer: Uint8Array, tail: number[]): boolean {
if (buffer.length < tail.length) return false;
const offset = buffer.length - tail.length;
return tail.every((b, i) => buffer[offset + i] === b);
}
function validateTrailer(
mime: string,
buffer: Uint8Array,
): { valid: true } | { valid: false; reason: string } {
if (mime === "image/png") {
// PNG ends with the IEND chunk: 0x49 0x45 0x4e 0x44 0xae 0x42 0x60 0x82.
// Anything after IEND is a polyglot tail and is rejected.
if (!endsWith(buffer, [0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82])) {
return { valid: false, reason: "PNG does not end with a well-formed IEND chunk." };
}
}
if (mime === "image/jpeg") {
// JPEG must end with the EOI marker 0xFFD9.
if (!endsWith(buffer, [0xff, 0xd9])) {
return { valid: false, reason: "JPEG does not end with a well-formed EOI marker." };
}
}
return { valid: true };
}
function scanForPolyglotMarkers(
buffer: Uint8Array,
): { valid: true } | { valid: false; reason: string } {
// Only the "textual" portion of an image — comments, EXIF text blocks, tail
// after the declared trailer — could carry HTML. We do a full-buffer scan
// because those regions can legitimately appear anywhere in the byte stream.
// Buffers up to MAX_IMAGE_BYTES_FOR_VALIDATION are cheap to scan linearly.
const asText = Buffer.from(buffer).toString("latin1").toLowerCase();
for (const marker of POLYGLOT_MARKERS) {
if (asText.includes(marker)) {
return {
valid: false,
reason: `Image contains a polyglot marker ("${marker}") — likely a disguised markup payload.`,
};
}
}
return { valid: true };
}
function decodeBase64Safe(
base64: string,
): { ok: true; buffer: Uint8Array } | { ok: false; reason: string } {
try {
const buffer = Buffer.from(base64, "base64");
if (buffer.length === 0) return { ok: false, reason: "Decoded image is empty." };
if (buffer.length > MAX_IMAGE_BYTES_FOR_VALIDATION) {
return { ok: false, reason: "Decoded image exceeds validation size budget." };
}
return { ok: true, buffer };
} catch {
return { ok: false, reason: "Invalid base64 encoding." };
}
}
/**
* Validates a data URL by comparing its declared MIME type against the actual magic bytes.
* Validates a data URL by comparing its declared MIME type against the actual
* magic bytes AND by decoding the full buffer to verify a consistent trailer
* and the absence of polyglot markup markers.
*
* Returns { valid: true } or { valid: false, reason: string }.
*/
export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid: false; reason: string } {
// Parse the data URL
export function validateImageDataUrl(
dataUrl: string,
): { valid: true } | { valid: false; reason: string } {
const match = dataUrl.match(/^data:(image\/[a-z+]+);base64,(.+)$/i);
if (!match) {
return { valid: false, reason: "Not a valid base64 image data URL." };
@@ -51,21 +143,22 @@ export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid
const declaredMime = match[1]!.toLowerCase();
const base64 = match[2]!;
// Decode at least the first 16 bytes for signature checking
let buffer: Uint8Array;
try {
const chunk = base64.slice(0, 24); // 24 base64 chars = 18 bytes, more than enough
buffer = Uint8Array.from(atob(chunk), (c) => c.charCodeAt(0));
} catch {
return { valid: false, reason: "Invalid base64 encoding." };
// Explicitly reject SVG — it is XML and can carry <script>. We do not accept
// vector uploads here regardless of how cleanly the payload decodes.
if (declaredMime === "image/svg+xml" || declaredMime === "image/svg") {
return { valid: false, reason: "SVG uploads are not permitted." };
}
const actualMime = detectImageMime(buffer);
const decoded = decodeBase64Safe(base64);
if (!decoded.ok) {
return { valid: false, reason: decoded.reason };
}
const actualMime = detectImageMime(decoded.buffer);
if (!actualMime) {
return { valid: false, reason: "File content does not match any known image format." };
}
// Allow JPEG variants (image/jpeg matches image/jpg header)
const normalize = (m: string) => m.replace("image/jpg", "image/jpeg");
if (normalize(declaredMime) !== normalize(actualMime)) {
return {
@@ -74,5 +167,11 @@ export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid
};
}
const trailer = validateTrailer(actualMime, decoded.buffer);
if (!trailer.valid) return trailer;
const polyglot = scanForPolyglotMarkers(decoded.buffer);
if (!polyglot.valid) return polyglot;
return { valid: true };
}
@@ -1,8 +1,5 @@
import {
DispoStagedRecordType,
ImportBatchStatus,
StagedRecordStatus,
} from "@capakraken/db";
import path from "node:path";
import { DispoStagedRecordType, ImportBatchStatus, StagedRecordStatus } from "@capakraken/db";
import {
assessDispoImportReadiness,
stageDispoImportBatch as stageDispoImportBatchApplication,
@@ -34,12 +31,24 @@ const paginationSchema = z.object({
const importBatchStatusSchema = z.nativeEnum(ImportBatchStatus);
const stagedRecordStatusSchema = z.nativeEnum(StagedRecordStatus);
const stagedRecordTypeSchema = z.nativeEnum(DispoStagedRecordType);
// Reject absolute paths and paths that contain `..` segments at the router
// boundary. The workbook reader re-validates against DISPO_IMPORT_DIR as
// defence-in-depth, but rejecting early here gives a clearer error to admin
// users and shrinks the attack surface if the reader is ever called with a
// different allowlist policy.
const workbookPathSchema = z
.string()
.trim()
.min(1, "Workbook path is required.")
.max(4096, "Workbook path is too long.")
.refine((value) => value.toLowerCase().endsWith(".xlsx"), {
message: "Only .xlsx workbook paths are supported.",
})
.refine((value) => !path.isAbsolute(value), {
message: "Workbook path must be relative to the configured import directory.",
})
.refine((value) => !value.split(/[\\/]/).some((segment) => segment === ".."), {
message: "Workbook path must not contain parent-directory segments.",
});
export const stageImportBatchInputSchema = z.object({
@@ -120,17 +129,16 @@ type ListStagedUnresolvedRecordsInput = z.infer<typeof listStagedUnresolvedRecor
type ResolveStagedRecordInput = z.infer<typeof resolveStagedRecordInputSchema>;
type CommitImportBatchInput = z.infer<typeof commitImportBatchInputSchema>;
export async function stageImportBatch(
ctx: DispoProcedureContext,
input: StageImportBatchInput,
) {
export async function stageImportBatch(ctx: DispoProcedureContext, input: StageImportBatchInput) {
return stageDispoImportBatchApplication(ctx.db, {
chargeabilityWorkbookPath: input.chargeabilityWorkbookPath,
planningWorkbookPath: input.planningWorkbookPath,
referenceWorkbookPath: input.referenceWorkbookPath,
...(input.costWorkbookPath !== undefined ? { costWorkbookPath: input.costWorkbookPath } : {}),
...(input.notes !== undefined ? { notes: input.notes } : {}),
...(input.rosterWorkbookPath !== undefined ? { rosterWorkbookPath: input.rosterWorkbookPath } : {}),
...(input.rosterWorkbookPath !== undefined
? { rosterWorkbookPath: input.rosterWorkbookPath }
: {}),
});
}
@@ -142,7 +150,9 @@ export async function validateImportBatch(input: ValidateImportBatchInput) {
...(input.costWorkbookPath !== undefined ? { costWorkbookPath: input.costWorkbookPath } : {}),
...(input.importBatchId !== undefined ? { importBatchId: input.importBatchId } : {}),
...(input.notes !== undefined ? { notes: input.notes } : {}),
...(input.rosterWorkbookPath !== undefined ? { rosterWorkbookPath: input.rosterWorkbookPath } : {}),
...(input.rosterWorkbookPath !== undefined
? { rosterWorkbookPath: input.rosterWorkbookPath }
: {}),
});
}
@@ -200,10 +210,7 @@ export async function resolveStagedRecord(
return resolveStagedRecordMutation(ctx.db, input);
}
export async function commitImportBatch(
ctx: DispoProcedureContext,
input: CommitImportBatchInput,
) {
export async function commitImportBatch(ctx: DispoProcedureContext, input: CommitImportBatchInput) {
return commitImportBatchMutation(ctx.db, {
importBatchId: input.importBatchId,
allowTbdUnresolved: input.allowTbdUnresolved,
+20
View File
@@ -100,6 +100,18 @@ export const projectCoverProcedures = {
message: `Gemini error: ${parseGeminiError(err)}`,
});
}
// Provider-generated output is still untrusted — a compromised or
// misconfigured upstream could return a polyglot payload. Run the
// same magic-byte + trailer + marker check we apply to user uploads
// before we persist the data URL to the database.
const providerCheck = validateImageDataUrl(coverImageUrl);
if (!providerCheck.valid) {
throw new TRPCError({
code: "INTERNAL_SERVER_ERROR",
message: `Provider image rejected by validator: ${providerCheck.reason}`,
});
}
} else {
const dalleClient = createDalleClient(runtimeSettings);
const model =
@@ -135,6 +147,14 @@ export const projectCoverProcedures = {
}
coverImageUrl = `data:image/png;base64,${b64}`;
const providerCheck = validateImageDataUrl(coverImageUrl);
if (!providerCheck.valid) {
throw new TRPCError({
code: "INTERNAL_SERVER_ERROR",
message: `Provider image rejected by validator: ${providerCheck.reason}`,
});
}
}
await ctx.db.project.update({
@@ -1,6 +1,6 @@
import { existsSync } from "node:fs";
import { fileURLToPath } from "node:url";
import { describe, expect, it, vi } from "vitest";
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
import {
assessDispoImportReadiness,
parseDispoChargeabilityWorkbook,
@@ -47,6 +47,19 @@ const hasSamples = [
costWorkbookPath,
].every((p) => existsSync(p));
// The dispo reader enforces DISPO_IMPORT_DIR as an allowlist. Sample fixtures
// live at the repo root (outside any production import dir), so scope the
// allowlist to `/` for this suite; a dedicated suite in read-workbook.test.ts
// exercises the containment check explicitly.
const originalImportDir = process.env["DISPO_IMPORT_DIR"];
beforeAll(() => {
process.env["DISPO_IMPORT_DIR"] = "/";
});
afterAll(() => {
if (originalImportDir === undefined) delete process.env["DISPO_IMPORT_DIR"];
else process.env["DISPO_IMPORT_DIR"] = originalImportDir;
});
describe.skipIf(!hasSamples)("dispo import", () => {
it("parses the mandatory reference workbook into normalized master data", async () => {
const parsed = await parseMandatoryDispoReferenceWorkbook(mandatoryWorkbookPath);
@@ -3,7 +3,7 @@ import { cp, mkdtemp, rm, writeFile } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { afterEach, describe, expect, it } from "vitest";
import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest";
import {
MAX_DISPO_WORKBOOK_BYTES,
MAX_DISPO_WORKBOOK_COLUMNS,
@@ -33,6 +33,20 @@ const itIfSamples = hasSamples ? it : it.skip;
const tempDirectories: string[] = [];
// The dispo reader now enforces DISPO_IMPORT_DIR as an allowlist. Existing
// tests pass absolute paths from sample fixtures or tmpdirs that live outside
// any production import dir, so scope the allowlist to the filesystem root
// for the test suite. New tests below restore a narrow allowlist to exercise
// the containment check explicitly.
const originalImportDir = process.env["DISPO_IMPORT_DIR"];
beforeAll(() => {
process.env["DISPO_IMPORT_DIR"] = "/";
});
afterAll(() => {
if (originalImportDir === undefined) delete process.env["DISPO_IMPORT_DIR"];
else process.env["DISPO_IMPORT_DIR"] = originalImportDir;
});
afterEach(async () => {
await Promise.all(
tempDirectories.splice(0).map(async (directory) => {
@@ -136,4 +150,58 @@ describe("readWorksheetMatrix", () => {
`exceeds the ${MAX_DISPO_WORKBOOK_COLUMNS} column import limit`,
);
}, 30000);
describe("DISPO_IMPORT_DIR allowlist", () => {
it("rejects absolute paths that escape the configured import dir", async () => {
const allowedDir = await makeTempDirectory();
const outsideDir = await makeTempDirectory();
const outsidePath = path.join(outsideDir, "outside.xlsx");
await writeWorkbook(outsidePath, [["a"]]);
const previous = process.env["DISPO_IMPORT_DIR"];
process.env["DISPO_IMPORT_DIR"] = allowedDir;
try {
await expect(readWorksheetMatrix(outsidePath, "Sheet1")).rejects.toThrow(
"Workbook path must be inside the configured import directory",
);
} finally {
process.env["DISPO_IMPORT_DIR"] = previous;
}
});
it("rejects relative paths that traverse out of the configured import dir", async () => {
const allowedDir = await makeTempDirectory();
const siblingDir = await makeTempDirectory();
const siblingPath = path.join(siblingDir, "sibling.xlsx");
await writeWorkbook(siblingPath, [["a"]]);
const relative = path.relative(allowedDir, siblingPath);
expect(relative.startsWith("..")).toBe(true);
const previous = process.env["DISPO_IMPORT_DIR"];
process.env["DISPO_IMPORT_DIR"] = allowedDir;
try {
await expect(readWorksheetMatrix(relative, "Sheet1")).rejects.toThrow(
"Workbook path must be inside the configured import directory",
);
} finally {
process.env["DISPO_IMPORT_DIR"] = previous;
}
});
it("accepts paths that resolve inside the configured import dir", async () => {
const allowedDir = await makeTempDirectory();
const insidePath = path.join(allowedDir, "inside.xlsx");
await writeWorkbook(insidePath, [["hello"]]);
const previous = process.env["DISPO_IMPORT_DIR"];
process.env["DISPO_IMPORT_DIR"] = allowedDir;
try {
const rows = await readWorksheetMatrix("inside.xlsx", "Sheet1");
expect(rows[0]?.[0]).toBe("hello");
} finally {
process.env["DISPO_IMPORT_DIR"] = previous;
}
});
});
});
@@ -4,6 +4,18 @@ import path from "node:path";
export type WorksheetCellValue = boolean | Date | number | string | null;
export type WorksheetMatrix = WorksheetCellValue[][];
// Path allowlist: dispo workbooks must live inside DISPO_IMPORT_DIR. Without
// this guard an admin (or a compromised admin token) could point the ExcelJS
// parser at any file the app process can read, reaching library CVEs on
// arbitrary filesystem paths. Default picks an in-repo `imports/` directory so
// local dev still works; production deployments should set DISPO_IMPORT_DIR
// explicitly to a dedicated volume.
function resolveImportDir(): string {
const configured = process.env["DISPO_IMPORT_DIR"];
const base = configured && configured.trim().length > 0 ? configured : path.resolve("imports");
return path.resolve(base);
}
type ExcelJsModule = typeof import("exceljs");
type ExcelJsWorkbook = InstanceType<ExcelJsModule["Workbook"]>;
type ExcelJsXlsxReader = ExcelJsWorkbook["xlsx"] & {
@@ -25,7 +37,9 @@ const EXCELJS_UNSUPPORTED_TABLE_FILTER_MARKER = '"name":"dateGroupItem"';
let _excelJs: ExcelJsModule | null = null;
const worksheetMatrixCache = new Map<string, Promise<WorksheetMatrix>>();
function normalizeExcelJsModule(module: ExcelJsModule | { default?: ExcelJsModule }): ExcelJsModule {
function normalizeExcelJsModule(
module: ExcelJsModule | { default?: ExcelJsModule },
): ExcelJsModule {
return "Workbook" in module ? module : (module.default as ExcelJsModule);
}
@@ -58,7 +72,19 @@ function cloneWorksheetMatrix(rows: WorksheetMatrix): WorksheetMatrix {
}
async function validateWorkbookPath(workbookPath: string): Promise<string> {
const resolvedPath = path.resolve(workbookPath);
const importDir = resolveImportDir();
const resolvedPath = path.resolve(importDir, workbookPath);
// path.relative returns a string that either starts with ".." (or equals
// "..") or is absolute when the resolved path escapes importDir. Both are
// rejected — defence against `..` sequences, symlink-shaped escapes and
// absolute-path injection via the tRPC surface.
const relative = path.relative(importDir, resolvedPath);
if (relative === ".." || relative.startsWith(`..${path.sep}`) || path.isAbsolute(relative)) {
throw new Error(
`Workbook path must be inside the configured import directory: "${workbookPath}"`,
);
}
if (path.extname(resolvedPath).toLowerCase() !== DISPO_WORKBOOK_EXTENSION) {
throw new Error(
@@ -132,7 +158,11 @@ function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
return String(value);
}
function assertWorksheetShape(rows: WorksheetMatrix, sheetName: string, workbookPath: string): void {
function assertWorksheetShape(
rows: WorksheetMatrix,
sheetName: string,
workbookPath: string,
): void {
if (rows.length > MAX_DISPO_WORKBOOK_ROWS) {
throw new Error(
`Worksheet "${sheetName}" in "${workbookPath}" exceeds the ${MAX_DISPO_WORKBOOK_ROWS} row import limit.`,