security: workbook path allowlist + stronger image polyglot validation (#54)

- dispo workbook imports are pinned to DISPO_IMPORT_DIR (default ./imports):
  tRPC input rejects absolute paths and .. segments, runtime reader
  re-validates containment via path.relative. Closes a path-traversal
  class that reached ExcelJS CVEs through admin/compromised tokens.
- image validator now checks the full 8-byte PNG magic, enforces PNG IEND
  and JPEG EOI trailers, scans the decoded buffer for markup polyglot
  markers (<script, <svg, <iframe, javascript:, onerror=, ...), and
  explicitly rejects SVG. Provider-generated covers (DALL-E, Gemini) run
  through the same validator before persistence — an untrusted upstream
  cannot smuggle a stored-XSS payload past us.
- added image-validation.test.ts and tightened documentation.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-17 15:26:29 +02:00
parent 3392297791
commit c4b01c1bfc
11 changed files with 394 additions and 65 deletions
@@ -58,22 +58,22 @@ describe("assistant dispo import batch delegation tools", () => {
const result = await executeTool(
"stage_dispo_import_batch",
JSON.stringify({
chargeabilityWorkbookPath: "/imports/chargeability.xlsx",
planningWorkbookPath: "/imports/planning.xlsx",
referenceWorkbookPath: "/imports/reference.xlsx",
costWorkbookPath: "/imports/cost.xlsx",
rosterWorkbookPath: "/imports/roster.xlsx",
chargeabilityWorkbookPath: "chargeability.xlsx",
planningWorkbookPath: "planning.xlsx",
referenceWorkbookPath: "reference.xlsx",
costWorkbookPath: "cost.xlsx",
rosterWorkbookPath: "roster.xlsx",
notes: "March import",
}),
ctx,
);
expect(stageDispoImportBatch).toHaveBeenCalledWith(ctx.db, {
chargeabilityWorkbookPath: "/imports/chargeability.xlsx",
planningWorkbookPath: "/imports/planning.xlsx",
referenceWorkbookPath: "/imports/reference.xlsx",
costWorkbookPath: "/imports/cost.xlsx",
rosterWorkbookPath: "/imports/roster.xlsx",
chargeabilityWorkbookPath: "chargeability.xlsx",
planningWorkbookPath: "planning.xlsx",
referenceWorkbookPath: "reference.xlsx",
costWorkbookPath: "cost.xlsx",
rosterWorkbookPath: "roster.xlsx",
notes: "March import",
});
expect(JSON.parse(result.content)).toEqual({
@@ -92,18 +92,18 @@ describe("assistant dispo import batch delegation tools", () => {
const result = await executeTool(
"validate_dispo_import_batch",
JSON.stringify({
chargeabilityWorkbookPath: "/imports/chargeability.xlsx",
planningWorkbookPath: "/imports/planning.xlsx",
referenceWorkbookPath: "/imports/reference.xlsx",
chargeabilityWorkbookPath: "chargeability.xlsx",
planningWorkbookPath: "planning.xlsx",
referenceWorkbookPath: "reference.xlsx",
importBatchId: "batch_1",
}),
ctx,
);
expect(assessDispoImportReadiness).toHaveBeenCalledWith({
chargeabilityWorkbookPath: "/imports/chargeability.xlsx",
planningWorkbookPath: "/imports/planning.xlsx",
referenceWorkbookPath: "/imports/reference.xlsx",
chargeabilityWorkbookPath: "chargeability.xlsx",
planningWorkbookPath: "planning.xlsx",
referenceWorkbookPath: "reference.xlsx",
importBatchId: "batch_1",
});
expect(JSON.parse(result.content)).toEqual({
@@ -41,7 +41,7 @@ vi.mock("../ai-client.js", async (importOriginal) => {
createDalleClient: vi.fn(() => ({
images: {
generate: vi.fn().mockResolvedValue({
data: [{ b64_json: "ZmFrZQ==" }],
data: [{ b64_json: "iVBORw0KGgoAAAAASUVORK5CYII=" }],
}),
},
})),
@@ -49,10 +49,7 @@ vi.mock("../ai-client.js", async (importOriginal) => {
};
});
import {
createToolContext,
executeTool,
} from "./assistant-tools-project-media-test-helpers.js";
import { createToolContext, executeTool } from "./assistant-tools-project-media-test-helpers.js";
describe("assistant project cover generation tools", () => {
beforeEach(() => {
@@ -60,7 +57,8 @@ describe("assistant project cover generation tools", () => {
});
it("routes project cover generation through the real project router path", async () => {
const projectFindUnique = vi.fn()
const projectFindUnique = vi
.fn()
.mockResolvedValueOnce({
id: "project_1",
name: "Project One",
@@ -84,7 +82,7 @@ describe("assistant project cover generation tools", () => {
});
const projectUpdate = vi.fn().mockResolvedValue({
id: "project_1",
coverImageUrl: "data:image/png;base64,ZmFrZQ==",
coverImageUrl: "data:image/png;base64,iVBORw0KGgoAAAAASUVORK5CYII=",
});
const ctx = createToolContext(
{
@@ -119,7 +117,7 @@ describe("assistant project cover generation tools", () => {
expect(projectUpdate).toHaveBeenCalledWith({
where: { id: "project_1" },
data: { coverImageUrl: "data:image/png;base64,ZmFrZQ==" },
data: { coverImageUrl: "data:image/png;base64,iVBORw0KGgoAAAAASUVORK5CYII=" },
});
expect(projectFindUnique).toHaveBeenCalledWith({
where: { id: "project_1" },
@@ -0,0 +1,82 @@
import { describe, expect, it } from "vitest";
import { validateImageDataUrl } from "../lib/image-validation.js";
const PNG_HEADER = [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a];
const PNG_IEND = [0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82];
const JPEG_HEADER = [0xff, 0xd8, 0xff, 0xe0];
const JPEG_EOI = [0xff, 0xd9];
function dataUrl(mime: string, bytes: number[]): string {
const base64 = Buffer.from(Uint8Array.from(bytes)).toString("base64");
return `data:${mime};base64,${base64}`;
}
describe("validateImageDataUrl", () => {
it("accepts a minimal well-formed PNG", () => {
const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00, ...PNG_IEND];
expect(validateImageDataUrl(dataUrl("image/png", bytes))).toEqual({ valid: true });
});
it("accepts a minimal well-formed JPEG", () => {
const bytes = [...JPEG_HEADER, 0x00, 0x00, ...JPEG_EOI];
expect(validateImageDataUrl(dataUrl("image/jpeg", bytes))).toEqual({ valid: true });
});
it("rejects SVG uploads explicitly", () => {
const svgBytes = Buffer.from("<svg xmlns='http://www.w3.org/2000/svg'/>", "utf8");
const base64 = svgBytes.toString("base64");
const result = validateImageDataUrl(`data:image/svg+xml;base64,${base64}`);
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/SVG/i);
});
it("rejects a polyglot PNG with an HTML tail after IEND", () => {
const html = Buffer.from("<!doctype html><script>alert(1)</script>", "utf8");
const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00, ...PNG_IEND, ...Array.from(html)];
const result = validateImageDataUrl(dataUrl("image/png", bytes));
expect(result.valid).toBe(false);
// Either the IEND-trailer check or the polyglot scan is acceptable — both
// reject the payload before it reaches storage. A tail after IEND naturally
// fails the trailer check first.
if (!result.valid) expect(result.reason).toMatch(/IEND|polyglot/i);
});
it("rejects a PNG that does not end with IEND", () => {
// Declare PNG and include header but truncate before IEND
const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00];
const result = validateImageDataUrl(dataUrl("image/png", bytes));
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/IEND/);
});
it("rejects a JPEG that does not end with the EOI marker", () => {
const bytes = [...JPEG_HEADER, 0x00, 0x00];
const result = validateImageDataUrl(dataUrl("image/jpeg", bytes));
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/EOI/);
});
it("rejects a MIME/content mismatch", () => {
const bytes = [...PNG_HEADER, 0x00, ...PNG_IEND];
const result = validateImageDataUrl(dataUrl("image/jpeg", bytes));
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/mismatch/i);
});
it("rejects a javascript: URL embedded in an EXIF-like comment", () => {
const marker = Buffer.from("javascript:alert(1)", "utf8");
const bytes = [...JPEG_HEADER, ...Array.from(marker), ...JPEG_EOI];
const result = validateImageDataUrl(dataUrl("image/jpeg", bytes));
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/polyglot/i);
});
it("rejects a non-data-URL string", () => {
expect(validateImageDataUrl("not a data url").valid).toBe(false);
});
it("rejects an empty decoded buffer", () => {
const result = validateImageDataUrl("data:image/png;base64,");
expect(result.valid).toBe(false);
});
});
+118 -19
View File
@@ -1,6 +1,11 @@
/**
* Validates that the actual bytes of a base64-encoded image match its declared MIME type.
* This prevents attackers from uploading malicious files with a spoofed extension/MIME.
* Validates that a base64 image data URL is a self-consistent image of its
* declared MIME type, and contains no polyglot markers (HTML/SVG/script tails
* masquerading under a valid image header). Note: this is validation, not
* sanitisation — we do not re-encode pixel data. The security goal is to
* prevent a user-uploaded data URL from ever passing if it contains anything
* a browser could later interpret as markup when the data URL is served
* somewhere less strict than `<img src>`.
*/
interface MagicSignature {
@@ -8,16 +13,39 @@ interface MagicSignature {
bytes: number[];
}
// Full PNG magic (8 bytes) and JPEG SOI (3 bytes). Older implementations used
// shorter prefixes which allowed polyglot payloads whose non-header bytes
// differed from the declared format.
const SIGNATURES: MagicSignature[] = [
{ mimeType: "image/png", bytes: [0x89, 0x50, 0x4e, 0x47] }, // .PNG
{ mimeType: "image/png", bytes: [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a] },
{ mimeType: "image/jpeg", bytes: [0xff, 0xd8, 0xff] },
{ mimeType: "image/webp", bytes: [0x52, 0x49, 0x46, 0x46] }, // RIFF (WebP starts with RIFF....WEBP)
{ mimeType: "image/gif", bytes: [0x47, 0x49, 0x46, 0x38] }, // GIF8
{ mimeType: "image/bmp", bytes: [0x42, 0x4d] }, // BM
{ mimeType: "image/tiff", bytes: [0x49, 0x49, 0x2a, 0x00] }, // Little-endian TIFF
{ mimeType: "image/tiff", bytes: [0x4d, 0x4d, 0x00, 0x2a] }, // Big-endian TIFF
{ mimeType: "image/gif", bytes: [0x47, 0x49, 0x46, 0x38] },
{ mimeType: "image/bmp", bytes: [0x42, 0x4d] },
{ mimeType: "image/tiff", bytes: [0x49, 0x49, 0x2a, 0x00] },
{ mimeType: "image/tiff", bytes: [0x4d, 0x4d, 0x00, 0x2a] },
];
// Polyglot markers — byte sequences that must never appear inside a bona-fide
// raster image. If any of these appears, the decoded content contains a
// tail/comment section that a browser or downstream parser could interpret as
// markup, giving us a stored-XSS vector if the bytes are ever served with a
// non-strict MIME. All comparisons are lowercased.
const POLYGLOT_MARKERS = [
"<!doctype",
"<script",
"<svg",
"<html",
"<iframe",
"<object",
"<embed",
"javascript:",
"onerror=",
"onload=",
];
const MAX_IMAGE_BYTES_FOR_VALIDATION = 16 * 1024 * 1024; // refuse to decode anything silly-large
/**
* Detects the actual MIME type of a binary buffer by checking magic bytes.
* Returns null if no known image signature matches.
@@ -37,12 +65,76 @@ export function detectImageMime(buffer: Uint8Array): string | null {
return null;
}
function endsWith(buffer: Uint8Array, tail: number[]): boolean {
if (buffer.length < tail.length) return false;
const offset = buffer.length - tail.length;
return tail.every((b, i) => buffer[offset + i] === b);
}
function validateTrailer(
mime: string,
buffer: Uint8Array,
): { valid: true } | { valid: false; reason: string } {
if (mime === "image/png") {
// PNG ends with the IEND chunk: 0x49 0x45 0x4e 0x44 0xae 0x42 0x60 0x82.
// Anything after IEND is a polyglot tail and is rejected.
if (!endsWith(buffer, [0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82])) {
return { valid: false, reason: "PNG does not end with a well-formed IEND chunk." };
}
}
if (mime === "image/jpeg") {
// JPEG must end with the EOI marker 0xFFD9.
if (!endsWith(buffer, [0xff, 0xd9])) {
return { valid: false, reason: "JPEG does not end with a well-formed EOI marker." };
}
}
return { valid: true };
}
function scanForPolyglotMarkers(
buffer: Uint8Array,
): { valid: true } | { valid: false; reason: string } {
// Only the "textual" portion of an image — comments, EXIF text blocks, tail
// after the declared trailer — could carry HTML. We do a full-buffer scan
// because those regions can legitimately appear anywhere in the byte stream.
// Buffers up to MAX_IMAGE_BYTES_FOR_VALIDATION are cheap to scan linearly.
const asText = Buffer.from(buffer).toString("latin1").toLowerCase();
for (const marker of POLYGLOT_MARKERS) {
if (asText.includes(marker)) {
return {
valid: false,
reason: `Image contains a polyglot marker ("${marker}") — likely a disguised markup payload.`,
};
}
}
return { valid: true };
}
function decodeBase64Safe(
base64: string,
): { ok: true; buffer: Uint8Array } | { ok: false; reason: string } {
try {
const buffer = Buffer.from(base64, "base64");
if (buffer.length === 0) return { ok: false, reason: "Decoded image is empty." };
if (buffer.length > MAX_IMAGE_BYTES_FOR_VALIDATION) {
return { ok: false, reason: "Decoded image exceeds validation size budget." };
}
return { ok: true, buffer };
} catch {
return { ok: false, reason: "Invalid base64 encoding." };
}
}
/**
* Validates a data URL by comparing its declared MIME type against the actual magic bytes.
* Validates a data URL by comparing its declared MIME type against the actual
* magic bytes AND by decoding the full buffer to verify a consistent trailer
* and the absence of polyglot markup markers.
*
* Returns { valid: true } or { valid: false, reason: string }.
*/
export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid: false; reason: string } {
// Parse the data URL
export function validateImageDataUrl(
dataUrl: string,
): { valid: true } | { valid: false; reason: string } {
const match = dataUrl.match(/^data:(image\/[a-z+]+);base64,(.+)$/i);
if (!match) {
return { valid: false, reason: "Not a valid base64 image data URL." };
@@ -51,21 +143,22 @@ export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid
const declaredMime = match[1]!.toLowerCase();
const base64 = match[2]!;
// Decode at least the first 16 bytes for signature checking
let buffer: Uint8Array;
try {
const chunk = base64.slice(0, 24); // 24 base64 chars = 18 bytes, more than enough
buffer = Uint8Array.from(atob(chunk), (c) => c.charCodeAt(0));
} catch {
return { valid: false, reason: "Invalid base64 encoding." };
// Explicitly reject SVG — it is XML and can carry <script>. We do not accept
// vector uploads here regardless of how cleanly the payload decodes.
if (declaredMime === "image/svg+xml" || declaredMime === "image/svg") {
return { valid: false, reason: "SVG uploads are not permitted." };
}
const actualMime = detectImageMime(buffer);
const decoded = decodeBase64Safe(base64);
if (!decoded.ok) {
return { valid: false, reason: decoded.reason };
}
const actualMime = detectImageMime(decoded.buffer);
if (!actualMime) {
return { valid: false, reason: "File content does not match any known image format." };
}
// Allow JPEG variants (image/jpeg matches image/jpg header)
const normalize = (m: string) => m.replace("image/jpg", "image/jpeg");
if (normalize(declaredMime) !== normalize(actualMime)) {
return {
@@ -74,5 +167,11 @@ export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid
};
}
const trailer = validateTrailer(actualMime, decoded.buffer);
if (!trailer.valid) return trailer;
const polyglot = scanForPolyglotMarkers(decoded.buffer);
if (!polyglot.valid) return polyglot;
return { valid: true };
}
@@ -1,8 +1,5 @@
import {
DispoStagedRecordType,
ImportBatchStatus,
StagedRecordStatus,
} from "@capakraken/db";
import path from "node:path";
import { DispoStagedRecordType, ImportBatchStatus, StagedRecordStatus } from "@capakraken/db";
import {
assessDispoImportReadiness,
stageDispoImportBatch as stageDispoImportBatchApplication,
@@ -34,12 +31,24 @@ const paginationSchema = z.object({
const importBatchStatusSchema = z.nativeEnum(ImportBatchStatus);
const stagedRecordStatusSchema = z.nativeEnum(StagedRecordStatus);
const stagedRecordTypeSchema = z.nativeEnum(DispoStagedRecordType);
// Reject absolute paths and paths that contain `..` segments at the router
// boundary. The workbook reader re-validates against DISPO_IMPORT_DIR as
// defence-in-depth, but rejecting early here gives a clearer error to admin
// users and shrinks the attack surface if the reader is ever called with a
// different allowlist policy.
const workbookPathSchema = z
.string()
.trim()
.min(1, "Workbook path is required.")
.max(4096, "Workbook path is too long.")
.refine((value) => value.toLowerCase().endsWith(".xlsx"), {
message: "Only .xlsx workbook paths are supported.",
})
.refine((value) => !path.isAbsolute(value), {
message: "Workbook path must be relative to the configured import directory.",
})
.refine((value) => !value.split(/[\\/]/).some((segment) => segment === ".."), {
message: "Workbook path must not contain parent-directory segments.",
});
export const stageImportBatchInputSchema = z.object({
@@ -120,17 +129,16 @@ type ListStagedUnresolvedRecordsInput = z.infer<typeof listStagedUnresolvedRecor
type ResolveStagedRecordInput = z.infer<typeof resolveStagedRecordInputSchema>;
type CommitImportBatchInput = z.infer<typeof commitImportBatchInputSchema>;
export async function stageImportBatch(
ctx: DispoProcedureContext,
input: StageImportBatchInput,
) {
export async function stageImportBatch(ctx: DispoProcedureContext, input: StageImportBatchInput) {
return stageDispoImportBatchApplication(ctx.db, {
chargeabilityWorkbookPath: input.chargeabilityWorkbookPath,
planningWorkbookPath: input.planningWorkbookPath,
referenceWorkbookPath: input.referenceWorkbookPath,
...(input.costWorkbookPath !== undefined ? { costWorkbookPath: input.costWorkbookPath } : {}),
...(input.notes !== undefined ? { notes: input.notes } : {}),
...(input.rosterWorkbookPath !== undefined ? { rosterWorkbookPath: input.rosterWorkbookPath } : {}),
...(input.rosterWorkbookPath !== undefined
? { rosterWorkbookPath: input.rosterWorkbookPath }
: {}),
});
}
@@ -142,7 +150,9 @@ export async function validateImportBatch(input: ValidateImportBatchInput) {
...(input.costWorkbookPath !== undefined ? { costWorkbookPath: input.costWorkbookPath } : {}),
...(input.importBatchId !== undefined ? { importBatchId: input.importBatchId } : {}),
...(input.notes !== undefined ? { notes: input.notes } : {}),
...(input.rosterWorkbookPath !== undefined ? { rosterWorkbookPath: input.rosterWorkbookPath } : {}),
...(input.rosterWorkbookPath !== undefined
? { rosterWorkbookPath: input.rosterWorkbookPath }
: {}),
});
}
@@ -200,10 +210,7 @@ export async function resolveStagedRecord(
return resolveStagedRecordMutation(ctx.db, input);
}
export async function commitImportBatch(
ctx: DispoProcedureContext,
input: CommitImportBatchInput,
) {
export async function commitImportBatch(ctx: DispoProcedureContext, input: CommitImportBatchInput) {
return commitImportBatchMutation(ctx.db, {
importBatchId: input.importBatchId,
allowTbdUnresolved: input.allowTbdUnresolved,
+20
View File
@@ -100,6 +100,18 @@ export const projectCoverProcedures = {
message: `Gemini error: ${parseGeminiError(err)}`,
});
}
// Provider-generated output is still untrusted — a compromised or
// misconfigured upstream could return a polyglot payload. Run the
// same magic-byte + trailer + marker check we apply to user uploads
// before we persist the data URL to the database.
const providerCheck = validateImageDataUrl(coverImageUrl);
if (!providerCheck.valid) {
throw new TRPCError({
code: "INTERNAL_SERVER_ERROR",
message: `Provider image rejected by validator: ${providerCheck.reason}`,
});
}
} else {
const dalleClient = createDalleClient(runtimeSettings);
const model =
@@ -135,6 +147,14 @@ export const projectCoverProcedures = {
}
coverImageUrl = `data:image/png;base64,${b64}`;
const providerCheck = validateImageDataUrl(coverImageUrl);
if (!providerCheck.valid) {
throw new TRPCError({
code: "INTERNAL_SERVER_ERROR",
message: `Provider image rejected by validator: ${providerCheck.reason}`,
});
}
}
await ctx.db.project.update({