diff --git a/.env.example b/.env.example
index 0b7daa8..d49b85c 100644
--- a/.env.example
+++ b/.env.example
@@ -97,6 +97,15 @@ PGADMIN_PASSWORD=
# If not set, Sentry is disabled (SDK is installed but sends nothing).
# NEXT_PUBLIC_SENTRY_DSN=
+# ─── Dispo import ────────────────────────────────────────────────────────────
+
+# Absolute directory that dispo .xlsx workbook imports must live under. The
+# tRPC surface only accepts relative paths and the runtime reader re-validates
+# that any resolved path remains inside this directory; this prevents an
+# admin (or compromised admin token) from pointing the parser at arbitrary
+# files on disk and reaching ExcelJS CVEs. Defaults to ./imports if unset.
+# DISPO_IMPORT_DIR=/var/lib/capakraken/imports
+
# ─── Testing (never enable in production) ────────────────────────────────────
# Disables rate limiting and session tracking during end-to-end tests.
diff --git a/docs/security-architecture.md b/docs/security-architecture.md
index c7d6a37..84a8525 100644
--- a/docs/security-architecture.md
+++ b/docs/security-architecture.md
@@ -102,9 +102,12 @@ publicProcedure
- Strict TypeScript (`strict: true`, `exactOptionalPropertyTypes: true`)
- Blueprint dynamic fields validated at runtime against stored Zod schema definitions
- File uploads validated by:
- - MIME type whitelist (`image/png`, `image/jpeg`, `image/webp`, `image/tiff`, `image/bmp`)
+ - MIME type whitelist (`image/png`, `image/jpeg`, `image/webp`, `image/tiff`, `image/bmp`). SVG is explicitly rejected — XML markup could carry `", "utf8");
+ const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00, ...PNG_IEND, ...Array.from(html)];
+ const result = validateImageDataUrl(dataUrl("image/png", bytes));
+ expect(result.valid).toBe(false);
+ // Either the IEND-trailer check or the polyglot scan is acceptable — both
+ // reject the payload before it reaches storage. A tail after IEND naturally
+ // fails the trailer check first.
+ if (!result.valid) expect(result.reason).toMatch(/IEND|polyglot/i);
+ });
+
+ it("rejects a PNG that does not end with IEND", () => {
+ // Declare PNG and include header but truncate before IEND
+ const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00];
+ const result = validateImageDataUrl(dataUrl("image/png", bytes));
+ expect(result.valid).toBe(false);
+ if (!result.valid) expect(result.reason).toMatch(/IEND/);
+ });
+
+ it("rejects a JPEG that does not end with the EOI marker", () => {
+ const bytes = [...JPEG_HEADER, 0x00, 0x00];
+ const result = validateImageDataUrl(dataUrl("image/jpeg", bytes));
+ expect(result.valid).toBe(false);
+ if (!result.valid) expect(result.reason).toMatch(/EOI/);
+ });
+
+ it("rejects a MIME/content mismatch", () => {
+ const bytes = [...PNG_HEADER, 0x00, ...PNG_IEND];
+ const result = validateImageDataUrl(dataUrl("image/jpeg", bytes));
+ expect(result.valid).toBe(false);
+ if (!result.valid) expect(result.reason).toMatch(/mismatch/i);
+ });
+
+ it("rejects a javascript: URL embedded in an EXIF-like comment", () => {
+ const marker = Buffer.from("javascript:alert(1)", "utf8");
+ const bytes = [...JPEG_HEADER, ...Array.from(marker), ...JPEG_EOI];
+ const result = validateImageDataUrl(dataUrl("image/jpeg", bytes));
+ expect(result.valid).toBe(false);
+ if (!result.valid) expect(result.reason).toMatch(/polyglot/i);
+ });
+
+ it("rejects a non-data-URL string", () => {
+ expect(validateImageDataUrl("not a data url").valid).toBe(false);
+ });
+
+ it("rejects an empty decoded buffer", () => {
+ const result = validateImageDataUrl("data:image/png;base64,");
+ expect(result.valid).toBe(false);
+ });
+});
diff --git a/packages/api/src/lib/image-validation.ts b/packages/api/src/lib/image-validation.ts
index ac675f3..8dfb1c7 100644
--- a/packages/api/src/lib/image-validation.ts
+++ b/packages/api/src/lib/image-validation.ts
@@ -1,6 +1,11 @@
/**
- * Validates that the actual bytes of a base64-encoded image match its declared MIME type.
- * This prevents attackers from uploading malicious files with a spoofed extension/MIME.
+ * Validates that a base64 image data URL is a self-consistent image of its
+ * declared MIME type, and contains no polyglot markers (HTML/SVG/script tails
+ * masquerading under a valid image header). Note: this is validation, not
+ * sanitisation — we do not re-encode pixel data. The security goal is to
+ * prevent a user-uploaded data URL from ever passing if it contains anything
+ * a browser could later interpret as markup when the data URL is served
+ * somewhere less strict than `
`.
*/
interface MagicSignature {
@@ -8,16 +13,39 @@ interface MagicSignature {
bytes: number[];
}
+// Full PNG magic (8 bytes) and JPEG SOI (3 bytes). Older implementations used
+// shorter prefixes which allowed polyglot payloads whose non-header bytes
+// differed from the declared format.
const SIGNATURES: MagicSignature[] = [
- { mimeType: "image/png", bytes: [0x89, 0x50, 0x4e, 0x47] }, // .PNG
+ { mimeType: "image/png", bytes: [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a] },
{ mimeType: "image/jpeg", bytes: [0xff, 0xd8, 0xff] },
{ mimeType: "image/webp", bytes: [0x52, 0x49, 0x46, 0x46] }, // RIFF (WebP starts with RIFF....WEBP)
- { mimeType: "image/gif", bytes: [0x47, 0x49, 0x46, 0x38] }, // GIF8
- { mimeType: "image/bmp", bytes: [0x42, 0x4d] }, // BM
- { mimeType: "image/tiff", bytes: [0x49, 0x49, 0x2a, 0x00] }, // Little-endian TIFF
- { mimeType: "image/tiff", bytes: [0x4d, 0x4d, 0x00, 0x2a] }, // Big-endian TIFF
+ { mimeType: "image/gif", bytes: [0x47, 0x49, 0x46, 0x38] },
+ { mimeType: "image/bmp", bytes: [0x42, 0x4d] },
+ { mimeType: "image/tiff", bytes: [0x49, 0x49, 0x2a, 0x00] },
+ { mimeType: "image/tiff", bytes: [0x4d, 0x4d, 0x00, 0x2a] },
];
+// Polyglot markers — byte sequences that must never appear inside a bona-fide
+// raster image. If any of these appears, the decoded content contains a
+// tail/comment section that a browser or downstream parser could interpret as
+// markup, giving us a stored-XSS vector if the bytes are ever served with a
+// non-strict MIME. All comparisons are lowercased.
+const POLYGLOT_MARKERS = [
+ " buffer[offset + i] === b);
+}
+
+function validateTrailer(
+ mime: string,
+ buffer: Uint8Array,
+): { valid: true } | { valid: false; reason: string } {
+ if (mime === "image/png") {
+ // PNG ends with the IEND chunk: 0x49 0x45 0x4e 0x44 0xae 0x42 0x60 0x82.
+ // Anything after IEND is a polyglot tail and is rejected.
+ if (!endsWith(buffer, [0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82])) {
+ return { valid: false, reason: "PNG does not end with a well-formed IEND chunk." };
+ }
+ }
+ if (mime === "image/jpeg") {
+ // JPEG must end with the EOI marker 0xFFD9.
+ if (!endsWith(buffer, [0xff, 0xd9])) {
+ return { valid: false, reason: "JPEG does not end with a well-formed EOI marker." };
+ }
+ }
+ return { valid: true };
+}
+
+function scanForPolyglotMarkers(
+ buffer: Uint8Array,
+): { valid: true } | { valid: false; reason: string } {
+ // Only the "textual" portion of an image — comments, EXIF text blocks, tail
+ // after the declared trailer — could carry HTML. We do a full-buffer scan
+ // because those regions can legitimately appear anywhere in the byte stream.
+ // Buffers up to MAX_IMAGE_BYTES_FOR_VALIDATION are cheap to scan linearly.
+ const asText = Buffer.from(buffer).toString("latin1").toLowerCase();
+ for (const marker of POLYGLOT_MARKERS) {
+ if (asText.includes(marker)) {
+ return {
+ valid: false,
+ reason: `Image contains a polyglot marker ("${marker}") — likely a disguised markup payload.`,
+ };
+ }
+ }
+ return { valid: true };
+}
+
+function decodeBase64Safe(
+ base64: string,
+): { ok: true; buffer: Uint8Array } | { ok: false; reason: string } {
+ try {
+ const buffer = Buffer.from(base64, "base64");
+ if (buffer.length === 0) return { ok: false, reason: "Decoded image is empty." };
+ if (buffer.length > MAX_IMAGE_BYTES_FOR_VALIDATION) {
+ return { ok: false, reason: "Decoded image exceeds validation size budget." };
+ }
+ return { ok: true, buffer };
+ } catch {
+ return { ok: false, reason: "Invalid base64 encoding." };
+ }
+}
+
/**
- * Validates a data URL by comparing its declared MIME type against the actual magic bytes.
+ * Validates a data URL by comparing its declared MIME type against the actual
+ * magic bytes AND by decoding the full buffer to verify a consistent trailer
+ * and the absence of polyglot markup markers.
+ *
* Returns { valid: true } or { valid: false, reason: string }.
*/
-export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid: false; reason: string } {
- // Parse the data URL
+export function validateImageDataUrl(
+ dataUrl: string,
+): { valid: true } | { valid: false; reason: string } {
const match = dataUrl.match(/^data:(image\/[a-z+]+);base64,(.+)$/i);
if (!match) {
return { valid: false, reason: "Not a valid base64 image data URL." };
@@ -51,21 +143,22 @@ export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid
const declaredMime = match[1]!.toLowerCase();
const base64 = match[2]!;
- // Decode at least the first 16 bytes for signature checking
- let buffer: Uint8Array;
- try {
- const chunk = base64.slice(0, 24); // 24 base64 chars = 18 bytes, more than enough
- buffer = Uint8Array.from(atob(chunk), (c) => c.charCodeAt(0));
- } catch {
- return { valid: false, reason: "Invalid base64 encoding." };
+ // Explicitly reject SVG — it is XML and can carry