From c4b01c1bfc41605009a33910e458e03c51a33155 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hartmut=20N=C3=B6renberg?= Date: Fri, 17 Apr 2026 15:26:29 +0200 Subject: [PATCH] security: workbook path allowlist + stronger image polyglot validation (#54) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - dispo workbook imports are pinned to DISPO_IMPORT_DIR (default ./imports): tRPC input rejects absolute paths and .. segments, runtime reader re-validates containment via path.relative. Closes a path-traversal class that reached ExcelJS CVEs through admin/compromised tokens. - image validator now checks the full 8-byte PNG magic, enforces PNG IEND and JPEG EOI trailers, scans the decoded buffer for markup polyglot markers ( --- .env.example | 9 ++ docs/security-architecture.md | 7 +- ...ools-dispo-import-batch-delegation.test.ts | 32 ++-- ...stant-tools-project-cover-generate.test.ts | 14 +- .../src/__tests__/image-validation.test.ts | 82 +++++++++++ packages/api/src/lib/image-validation.ts | 137 +++++++++++++++--- .../api/src/router/dispo-procedure-support.ts | 37 +++-- packages/api/src/router/project-cover.ts | 20 +++ .../src/__tests__/dispo-import.test.ts | 15 +- .../src/__tests__/read-workbook.test.ts | 70 ++++++++- .../use-cases/dispo-import/read-workbook.ts | 36 ++++- 11 files changed, 394 insertions(+), 65 deletions(-) create mode 100644 packages/api/src/__tests__/image-validation.test.ts diff --git a/.env.example b/.env.example index 0b7daa8..d49b85c 100644 --- a/.env.example +++ b/.env.example @@ -97,6 +97,15 @@ PGADMIN_PASSWORD= # If not set, Sentry is disabled (SDK is installed but sends nothing). # NEXT_PUBLIC_SENTRY_DSN= +# ─── Dispo import ──────────────────────────────────────────────────────────── + +# Absolute directory that dispo .xlsx workbook imports must live under. The +# tRPC surface only accepts relative paths and the runtime reader re-validates +# that any resolved path remains inside this directory; this prevents an +# admin (or compromised admin token) from pointing the parser at arbitrary +# files on disk and reaching ExcelJS CVEs. Defaults to ./imports if unset. +# DISPO_IMPORT_DIR=/var/lib/capakraken/imports + # ─── Testing (never enable in production) ──────────────────────────────────── # Disables rate limiting and session tracking during end-to-end tests. diff --git a/docs/security-architecture.md b/docs/security-architecture.md index c7d6a37..84a8525 100644 --- a/docs/security-architecture.md +++ b/docs/security-architecture.md @@ -102,9 +102,12 @@ publicProcedure - Strict TypeScript (`strict: true`, `exactOptionalPropertyTypes: true`) - Blueprint dynamic fields validated at runtime against stored Zod schema definitions - File uploads validated by: - - MIME type whitelist (`image/png`, `image/jpeg`, `image/webp`, `image/tiff`, `image/bmp`) + - MIME type whitelist (`image/png`, `image/jpeg`, `image/webp`, `image/tiff`, `image/bmp`). SVG is explicitly rejected — XML markup could carry `", "utf8"); + const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00, ...PNG_IEND, ...Array.from(html)]; + const result = validateImageDataUrl(dataUrl("image/png", bytes)); + expect(result.valid).toBe(false); + // Either the IEND-trailer check or the polyglot scan is acceptable — both + // reject the payload before it reaches storage. A tail after IEND naturally + // fails the trailer check first. + if (!result.valid) expect(result.reason).toMatch(/IEND|polyglot/i); + }); + + it("rejects a PNG that does not end with IEND", () => { + // Declare PNG and include header but truncate before IEND + const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00]; + const result = validateImageDataUrl(dataUrl("image/png", bytes)); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.reason).toMatch(/IEND/); + }); + + it("rejects a JPEG that does not end with the EOI marker", () => { + const bytes = [...JPEG_HEADER, 0x00, 0x00]; + const result = validateImageDataUrl(dataUrl("image/jpeg", bytes)); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.reason).toMatch(/EOI/); + }); + + it("rejects a MIME/content mismatch", () => { + const bytes = [...PNG_HEADER, 0x00, ...PNG_IEND]; + const result = validateImageDataUrl(dataUrl("image/jpeg", bytes)); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.reason).toMatch(/mismatch/i); + }); + + it("rejects a javascript: URL embedded in an EXIF-like comment", () => { + const marker = Buffer.from("javascript:alert(1)", "utf8"); + const bytes = [...JPEG_HEADER, ...Array.from(marker), ...JPEG_EOI]; + const result = validateImageDataUrl(dataUrl("image/jpeg", bytes)); + expect(result.valid).toBe(false); + if (!result.valid) expect(result.reason).toMatch(/polyglot/i); + }); + + it("rejects a non-data-URL string", () => { + expect(validateImageDataUrl("not a data url").valid).toBe(false); + }); + + it("rejects an empty decoded buffer", () => { + const result = validateImageDataUrl("data:image/png;base64,"); + expect(result.valid).toBe(false); + }); +}); diff --git a/packages/api/src/lib/image-validation.ts b/packages/api/src/lib/image-validation.ts index ac675f3..8dfb1c7 100644 --- a/packages/api/src/lib/image-validation.ts +++ b/packages/api/src/lib/image-validation.ts @@ -1,6 +1,11 @@ /** - * Validates that the actual bytes of a base64-encoded image match its declared MIME type. - * This prevents attackers from uploading malicious files with a spoofed extension/MIME. + * Validates that a base64 image data URL is a self-consistent image of its + * declared MIME type, and contains no polyglot markers (HTML/SVG/script tails + * masquerading under a valid image header). Note: this is validation, not + * sanitisation — we do not re-encode pixel data. The security goal is to + * prevent a user-uploaded data URL from ever passing if it contains anything + * a browser could later interpret as markup when the data URL is served + * somewhere less strict than ``. */ interface MagicSignature { @@ -8,16 +13,39 @@ interface MagicSignature { bytes: number[]; } +// Full PNG magic (8 bytes) and JPEG SOI (3 bytes). Older implementations used +// shorter prefixes which allowed polyglot payloads whose non-header bytes +// differed from the declared format. const SIGNATURES: MagicSignature[] = [ - { mimeType: "image/png", bytes: [0x89, 0x50, 0x4e, 0x47] }, // .PNG + { mimeType: "image/png", bytes: [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a] }, { mimeType: "image/jpeg", bytes: [0xff, 0xd8, 0xff] }, { mimeType: "image/webp", bytes: [0x52, 0x49, 0x46, 0x46] }, // RIFF (WebP starts with RIFF....WEBP) - { mimeType: "image/gif", bytes: [0x47, 0x49, 0x46, 0x38] }, // GIF8 - { mimeType: "image/bmp", bytes: [0x42, 0x4d] }, // BM - { mimeType: "image/tiff", bytes: [0x49, 0x49, 0x2a, 0x00] }, // Little-endian TIFF - { mimeType: "image/tiff", bytes: [0x4d, 0x4d, 0x00, 0x2a] }, // Big-endian TIFF + { mimeType: "image/gif", bytes: [0x47, 0x49, 0x46, 0x38] }, + { mimeType: "image/bmp", bytes: [0x42, 0x4d] }, + { mimeType: "image/tiff", bytes: [0x49, 0x49, 0x2a, 0x00] }, + { mimeType: "image/tiff", bytes: [0x4d, 0x4d, 0x00, 0x2a] }, ]; +// Polyglot markers — byte sequences that must never appear inside a bona-fide +// raster image. If any of these appears, the decoded content contains a +// tail/comment section that a browser or downstream parser could interpret as +// markup, giving us a stored-XSS vector if the bytes are ever served with a +// non-strict MIME. All comparisons are lowercased. +const POLYGLOT_MARKERS = [ + " buffer[offset + i] === b); +} + +function validateTrailer( + mime: string, + buffer: Uint8Array, +): { valid: true } | { valid: false; reason: string } { + if (mime === "image/png") { + // PNG ends with the IEND chunk: 0x49 0x45 0x4e 0x44 0xae 0x42 0x60 0x82. + // Anything after IEND is a polyglot tail and is rejected. + if (!endsWith(buffer, [0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82])) { + return { valid: false, reason: "PNG does not end with a well-formed IEND chunk." }; + } + } + if (mime === "image/jpeg") { + // JPEG must end with the EOI marker 0xFFD9. + if (!endsWith(buffer, [0xff, 0xd9])) { + return { valid: false, reason: "JPEG does not end with a well-formed EOI marker." }; + } + } + return { valid: true }; +} + +function scanForPolyglotMarkers( + buffer: Uint8Array, +): { valid: true } | { valid: false; reason: string } { + // Only the "textual" portion of an image — comments, EXIF text blocks, tail + // after the declared trailer — could carry HTML. We do a full-buffer scan + // because those regions can legitimately appear anywhere in the byte stream. + // Buffers up to MAX_IMAGE_BYTES_FOR_VALIDATION are cheap to scan linearly. + const asText = Buffer.from(buffer).toString("latin1").toLowerCase(); + for (const marker of POLYGLOT_MARKERS) { + if (asText.includes(marker)) { + return { + valid: false, + reason: `Image contains a polyglot marker ("${marker}") — likely a disguised markup payload.`, + }; + } + } + return { valid: true }; +} + +function decodeBase64Safe( + base64: string, +): { ok: true; buffer: Uint8Array } | { ok: false; reason: string } { + try { + const buffer = Buffer.from(base64, "base64"); + if (buffer.length === 0) return { ok: false, reason: "Decoded image is empty." }; + if (buffer.length > MAX_IMAGE_BYTES_FOR_VALIDATION) { + return { ok: false, reason: "Decoded image exceeds validation size budget." }; + } + return { ok: true, buffer }; + } catch { + return { ok: false, reason: "Invalid base64 encoding." }; + } +} + /** - * Validates a data URL by comparing its declared MIME type against the actual magic bytes. + * Validates a data URL by comparing its declared MIME type against the actual + * magic bytes AND by decoding the full buffer to verify a consistent trailer + * and the absence of polyglot markup markers. + * * Returns { valid: true } or { valid: false, reason: string }. */ -export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid: false; reason: string } { - // Parse the data URL +export function validateImageDataUrl( + dataUrl: string, +): { valid: true } | { valid: false; reason: string } { const match = dataUrl.match(/^data:(image\/[a-z+]+);base64,(.+)$/i); if (!match) { return { valid: false, reason: "Not a valid base64 image data URL." }; @@ -51,21 +143,22 @@ export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid const declaredMime = match[1]!.toLowerCase(); const base64 = match[2]!; - // Decode at least the first 16 bytes for signature checking - let buffer: Uint8Array; - try { - const chunk = base64.slice(0, 24); // 24 base64 chars = 18 bytes, more than enough - buffer = Uint8Array.from(atob(chunk), (c) => c.charCodeAt(0)); - } catch { - return { valid: false, reason: "Invalid base64 encoding." }; + // Explicitly reject SVG — it is XML and can carry