security: workbook path allowlist + stronger image polyglot validation (#54)

- dispo workbook imports are pinned to DISPO_IMPORT_DIR (default ./imports):
  tRPC input rejects absolute paths and .. segments, runtime reader
  re-validates containment via path.relative. Closes a path-traversal
  class that reached ExcelJS CVEs through admin/compromised tokens.
- image validator now checks the full 8-byte PNG magic, enforces PNG IEND
  and JPEG EOI trailers, scans the decoded buffer for markup polyglot
  markers (<script, <svg, <iframe, javascript:, onerror=, ...), and
  explicitly rejects SVG. Provider-generated covers (DALL-E, Gemini) run
  through the same validator before persistence — an untrusted upstream
  cannot smuggle a stored-XSS payload past us.
- added image-validation.test.ts and tightened documentation.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-17 15:26:29 +02:00
parent 3392297791
commit c4b01c1bfc
11 changed files with 394 additions and 65 deletions
+9
View File
@@ -97,6 +97,15 @@ PGADMIN_PASSWORD=
# If not set, Sentry is disabled (SDK is installed but sends nothing).
# NEXT_PUBLIC_SENTRY_DSN=
# ─── Dispo import ────────────────────────────────────────────────────────────
# Absolute directory that dispo .xlsx workbook imports must live under. The
# tRPC surface only accepts relative paths and the runtime reader re-validates
# that any resolved path remains inside this directory; this prevents an
# admin (or compromised admin token) from pointing the parser at arbitrary
# files on disk and reaching ExcelJS CVEs. Defaults to ./imports if unset.
# DISPO_IMPORT_DIR=/var/lib/capakraken/imports
# ─── Testing (never enable in production) ────────────────────────────────────
# Disables rate limiting and session tracking during end-to-end tests.
+5 -2
View File
@@ -102,9 +102,12 @@ publicProcedure
- Strict TypeScript (`strict: true`, `exactOptionalPropertyTypes: true`)
- Blueprint dynamic fields validated at runtime against stored Zod schema definitions
- File uploads validated by:
- MIME type whitelist (`image/png`, `image/jpeg`, `image/webp`, `image/tiff`, `image/bmp`)
- MIME type whitelist (`image/png`, `image/jpeg`, `image/webp`, `image/tiff`, `image/bmp`). SVG is explicitly rejected — XML markup could carry `<script>`.
- Size limit (10 MB client-side, 4 MB server-side after compression)
- Magic byte verification (actual file content matched against declared MIME)
- Full magic-byte verification: declared MIME must match actual content. PNG uses the full 8-byte signature, not a short prefix that would accept polyglots.
- Trailer check: PNG must end with an `IEND` chunk, JPEG with the `FFD9` EOI marker. Any bytes appended after the trailer are rejected.
- Polyglot-marker scan: the decoded buffer is searched (latin1, lowercased) for markup fragments (`<script`, `<svg`, `<iframe`, `javascript:`, `onerror=`, …) and rejected if any appear. Provider-generated images (DALL-E, Gemini) run through the same validator before persistence — an untrusted upstream cannot smuggle a stored-XSS payload past us by virtue of being "our" API.
- Dispo workbook imports must live under the `DISPO_IMPORT_DIR` directory (defaults to `./imports`). The tRPC input schema accepts only relative paths (no `..` segments, no absolute paths), and the runtime workbook reader re-validates that the resolved absolute path stays inside `DISPO_IMPORT_DIR`. This closes a path-traversal class that would have let an admin (or compromised admin token) point the ExcelJS parser at arbitrary files on disk, keeping known ExcelJS CVEs from being reachable through our own API.
### Prompt-Injection Guard (defense-in-depth only)
@@ -58,22 +58,22 @@ describe("assistant dispo import batch delegation tools", () => {
const result = await executeTool(
"stage_dispo_import_batch",
JSON.stringify({
chargeabilityWorkbookPath: "/imports/chargeability.xlsx",
planningWorkbookPath: "/imports/planning.xlsx",
referenceWorkbookPath: "/imports/reference.xlsx",
costWorkbookPath: "/imports/cost.xlsx",
rosterWorkbookPath: "/imports/roster.xlsx",
chargeabilityWorkbookPath: "chargeability.xlsx",
planningWorkbookPath: "planning.xlsx",
referenceWorkbookPath: "reference.xlsx",
costWorkbookPath: "cost.xlsx",
rosterWorkbookPath: "roster.xlsx",
notes: "March import",
}),
ctx,
);
expect(stageDispoImportBatch).toHaveBeenCalledWith(ctx.db, {
chargeabilityWorkbookPath: "/imports/chargeability.xlsx",
planningWorkbookPath: "/imports/planning.xlsx",
referenceWorkbookPath: "/imports/reference.xlsx",
costWorkbookPath: "/imports/cost.xlsx",
rosterWorkbookPath: "/imports/roster.xlsx",
chargeabilityWorkbookPath: "chargeability.xlsx",
planningWorkbookPath: "planning.xlsx",
referenceWorkbookPath: "reference.xlsx",
costWorkbookPath: "cost.xlsx",
rosterWorkbookPath: "roster.xlsx",
notes: "March import",
});
expect(JSON.parse(result.content)).toEqual({
@@ -92,18 +92,18 @@ describe("assistant dispo import batch delegation tools", () => {
const result = await executeTool(
"validate_dispo_import_batch",
JSON.stringify({
chargeabilityWorkbookPath: "/imports/chargeability.xlsx",
planningWorkbookPath: "/imports/planning.xlsx",
referenceWorkbookPath: "/imports/reference.xlsx",
chargeabilityWorkbookPath: "chargeability.xlsx",
planningWorkbookPath: "planning.xlsx",
referenceWorkbookPath: "reference.xlsx",
importBatchId: "batch_1",
}),
ctx,
);
expect(assessDispoImportReadiness).toHaveBeenCalledWith({
chargeabilityWorkbookPath: "/imports/chargeability.xlsx",
planningWorkbookPath: "/imports/planning.xlsx",
referenceWorkbookPath: "/imports/reference.xlsx",
chargeabilityWorkbookPath: "chargeability.xlsx",
planningWorkbookPath: "planning.xlsx",
referenceWorkbookPath: "reference.xlsx",
importBatchId: "batch_1",
});
expect(JSON.parse(result.content)).toEqual({
@@ -41,7 +41,7 @@ vi.mock("../ai-client.js", async (importOriginal) => {
createDalleClient: vi.fn(() => ({
images: {
generate: vi.fn().mockResolvedValue({
data: [{ b64_json: "ZmFrZQ==" }],
data: [{ b64_json: "iVBORw0KGgoAAAAASUVORK5CYII=" }],
}),
},
})),
@@ -49,10 +49,7 @@ vi.mock("../ai-client.js", async (importOriginal) => {
};
});
import {
createToolContext,
executeTool,
} from "./assistant-tools-project-media-test-helpers.js";
import { createToolContext, executeTool } from "./assistant-tools-project-media-test-helpers.js";
describe("assistant project cover generation tools", () => {
beforeEach(() => {
@@ -60,7 +57,8 @@ describe("assistant project cover generation tools", () => {
});
it("routes project cover generation through the real project router path", async () => {
const projectFindUnique = vi.fn()
const projectFindUnique = vi
.fn()
.mockResolvedValueOnce({
id: "project_1",
name: "Project One",
@@ -84,7 +82,7 @@ describe("assistant project cover generation tools", () => {
});
const projectUpdate = vi.fn().mockResolvedValue({
id: "project_1",
coverImageUrl: "data:image/png;base64,ZmFrZQ==",
coverImageUrl: "data:image/png;base64,iVBORw0KGgoAAAAASUVORK5CYII=",
});
const ctx = createToolContext(
{
@@ -119,7 +117,7 @@ describe("assistant project cover generation tools", () => {
expect(projectUpdate).toHaveBeenCalledWith({
where: { id: "project_1" },
data: { coverImageUrl: "data:image/png;base64,ZmFrZQ==" },
data: { coverImageUrl: "data:image/png;base64,iVBORw0KGgoAAAAASUVORK5CYII=" },
});
expect(projectFindUnique).toHaveBeenCalledWith({
where: { id: "project_1" },
@@ -0,0 +1,82 @@
import { describe, expect, it } from "vitest";
import { validateImageDataUrl } from "../lib/image-validation.js";
const PNG_HEADER = [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a];
const PNG_IEND = [0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82];
const JPEG_HEADER = [0xff, 0xd8, 0xff, 0xe0];
const JPEG_EOI = [0xff, 0xd9];
function dataUrl(mime: string, bytes: number[]): string {
const base64 = Buffer.from(Uint8Array.from(bytes)).toString("base64");
return `data:${mime};base64,${base64}`;
}
describe("validateImageDataUrl", () => {
it("accepts a minimal well-formed PNG", () => {
const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00, ...PNG_IEND];
expect(validateImageDataUrl(dataUrl("image/png", bytes))).toEqual({ valid: true });
});
it("accepts a minimal well-formed JPEG", () => {
const bytes = [...JPEG_HEADER, 0x00, 0x00, ...JPEG_EOI];
expect(validateImageDataUrl(dataUrl("image/jpeg", bytes))).toEqual({ valid: true });
});
it("rejects SVG uploads explicitly", () => {
const svgBytes = Buffer.from("<svg xmlns='http://www.w3.org/2000/svg'/>", "utf8");
const base64 = svgBytes.toString("base64");
const result = validateImageDataUrl(`data:image/svg+xml;base64,${base64}`);
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/SVG/i);
});
it("rejects a polyglot PNG with an HTML tail after IEND", () => {
const html = Buffer.from("<!doctype html><script>alert(1)</script>", "utf8");
const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00, ...PNG_IEND, ...Array.from(html)];
const result = validateImageDataUrl(dataUrl("image/png", bytes));
expect(result.valid).toBe(false);
// Either the IEND-trailer check or the polyglot scan is acceptable — both
// reject the payload before it reaches storage. A tail after IEND naturally
// fails the trailer check first.
if (!result.valid) expect(result.reason).toMatch(/IEND|polyglot/i);
});
it("rejects a PNG that does not end with IEND", () => {
// Declare PNG and include header but truncate before IEND
const bytes = [...PNG_HEADER, 0x00, 0x00, 0x00, 0x00];
const result = validateImageDataUrl(dataUrl("image/png", bytes));
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/IEND/);
});
it("rejects a JPEG that does not end with the EOI marker", () => {
const bytes = [...JPEG_HEADER, 0x00, 0x00];
const result = validateImageDataUrl(dataUrl("image/jpeg", bytes));
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/EOI/);
});
it("rejects a MIME/content mismatch", () => {
const bytes = [...PNG_HEADER, 0x00, ...PNG_IEND];
const result = validateImageDataUrl(dataUrl("image/jpeg", bytes));
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/mismatch/i);
});
it("rejects a javascript: URL embedded in an EXIF-like comment", () => {
const marker = Buffer.from("javascript:alert(1)", "utf8");
const bytes = [...JPEG_HEADER, ...Array.from(marker), ...JPEG_EOI];
const result = validateImageDataUrl(dataUrl("image/jpeg", bytes));
expect(result.valid).toBe(false);
if (!result.valid) expect(result.reason).toMatch(/polyglot/i);
});
it("rejects a non-data-URL string", () => {
expect(validateImageDataUrl("not a data url").valid).toBe(false);
});
it("rejects an empty decoded buffer", () => {
const result = validateImageDataUrl("data:image/png;base64,");
expect(result.valid).toBe(false);
});
});
+118 -19
View File
@@ -1,6 +1,11 @@
/**
* Validates that the actual bytes of a base64-encoded image match its declared MIME type.
* This prevents attackers from uploading malicious files with a spoofed extension/MIME.
* Validates that a base64 image data URL is a self-consistent image of its
* declared MIME type, and contains no polyglot markers (HTML/SVG/script tails
* masquerading under a valid image header). Note: this is validation, not
* sanitisation — we do not re-encode pixel data. The security goal is to
* prevent a user-uploaded data URL from ever passing if it contains anything
* a browser could later interpret as markup when the data URL is served
* somewhere less strict than `<img src>`.
*/
interface MagicSignature {
@@ -8,16 +13,39 @@ interface MagicSignature {
bytes: number[];
}
// Full PNG magic (8 bytes) and JPEG SOI (3 bytes). Older implementations used
// shorter prefixes which allowed polyglot payloads whose non-header bytes
// differed from the declared format.
const SIGNATURES: MagicSignature[] = [
{ mimeType: "image/png", bytes: [0x89, 0x50, 0x4e, 0x47] }, // .PNG
{ mimeType: "image/png", bytes: [0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a] },
{ mimeType: "image/jpeg", bytes: [0xff, 0xd8, 0xff] },
{ mimeType: "image/webp", bytes: [0x52, 0x49, 0x46, 0x46] }, // RIFF (WebP starts with RIFF....WEBP)
{ mimeType: "image/gif", bytes: [0x47, 0x49, 0x46, 0x38] }, // GIF8
{ mimeType: "image/bmp", bytes: [0x42, 0x4d] }, // BM
{ mimeType: "image/tiff", bytes: [0x49, 0x49, 0x2a, 0x00] }, // Little-endian TIFF
{ mimeType: "image/tiff", bytes: [0x4d, 0x4d, 0x00, 0x2a] }, // Big-endian TIFF
{ mimeType: "image/gif", bytes: [0x47, 0x49, 0x46, 0x38] },
{ mimeType: "image/bmp", bytes: [0x42, 0x4d] },
{ mimeType: "image/tiff", bytes: [0x49, 0x49, 0x2a, 0x00] },
{ mimeType: "image/tiff", bytes: [0x4d, 0x4d, 0x00, 0x2a] },
];
// Polyglot markers — byte sequences that must never appear inside a bona-fide
// raster image. If any of these appears, the decoded content contains a
// tail/comment section that a browser or downstream parser could interpret as
// markup, giving us a stored-XSS vector if the bytes are ever served with a
// non-strict MIME. All comparisons are lowercased.
const POLYGLOT_MARKERS = [
"<!doctype",
"<script",
"<svg",
"<html",
"<iframe",
"<object",
"<embed",
"javascript:",
"onerror=",
"onload=",
];
const MAX_IMAGE_BYTES_FOR_VALIDATION = 16 * 1024 * 1024; // refuse to decode anything silly-large
/**
* Detects the actual MIME type of a binary buffer by checking magic bytes.
* Returns null if no known image signature matches.
@@ -37,12 +65,76 @@ export function detectImageMime(buffer: Uint8Array): string | null {
return null;
}
function endsWith(buffer: Uint8Array, tail: number[]): boolean {
if (buffer.length < tail.length) return false;
const offset = buffer.length - tail.length;
return tail.every((b, i) => buffer[offset + i] === b);
}
function validateTrailer(
mime: string,
buffer: Uint8Array,
): { valid: true } | { valid: false; reason: string } {
if (mime === "image/png") {
// PNG ends with the IEND chunk: 0x49 0x45 0x4e 0x44 0xae 0x42 0x60 0x82.
// Anything after IEND is a polyglot tail and is rejected.
if (!endsWith(buffer, [0x49, 0x45, 0x4e, 0x44, 0xae, 0x42, 0x60, 0x82])) {
return { valid: false, reason: "PNG does not end with a well-formed IEND chunk." };
}
}
if (mime === "image/jpeg") {
// JPEG must end with the EOI marker 0xFFD9.
if (!endsWith(buffer, [0xff, 0xd9])) {
return { valid: false, reason: "JPEG does not end with a well-formed EOI marker." };
}
}
return { valid: true };
}
function scanForPolyglotMarkers(
buffer: Uint8Array,
): { valid: true } | { valid: false; reason: string } {
// Only the "textual" portion of an image — comments, EXIF text blocks, tail
// after the declared trailer — could carry HTML. We do a full-buffer scan
// because those regions can legitimately appear anywhere in the byte stream.
// Buffers up to MAX_IMAGE_BYTES_FOR_VALIDATION are cheap to scan linearly.
const asText = Buffer.from(buffer).toString("latin1").toLowerCase();
for (const marker of POLYGLOT_MARKERS) {
if (asText.includes(marker)) {
return {
valid: false,
reason: `Image contains a polyglot marker ("${marker}") — likely a disguised markup payload.`,
};
}
}
return { valid: true };
}
function decodeBase64Safe(
base64: string,
): { ok: true; buffer: Uint8Array } | { ok: false; reason: string } {
try {
const buffer = Buffer.from(base64, "base64");
if (buffer.length === 0) return { ok: false, reason: "Decoded image is empty." };
if (buffer.length > MAX_IMAGE_BYTES_FOR_VALIDATION) {
return { ok: false, reason: "Decoded image exceeds validation size budget." };
}
return { ok: true, buffer };
} catch {
return { ok: false, reason: "Invalid base64 encoding." };
}
}
/**
* Validates a data URL by comparing its declared MIME type against the actual magic bytes.
* Validates a data URL by comparing its declared MIME type against the actual
* magic bytes AND by decoding the full buffer to verify a consistent trailer
* and the absence of polyglot markup markers.
*
* Returns { valid: true } or { valid: false, reason: string }.
*/
export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid: false; reason: string } {
// Parse the data URL
export function validateImageDataUrl(
dataUrl: string,
): { valid: true } | { valid: false; reason: string } {
const match = dataUrl.match(/^data:(image\/[a-z+]+);base64,(.+)$/i);
if (!match) {
return { valid: false, reason: "Not a valid base64 image data URL." };
@@ -51,21 +143,22 @@ export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid
const declaredMime = match[1]!.toLowerCase();
const base64 = match[2]!;
// Decode at least the first 16 bytes for signature checking
let buffer: Uint8Array;
try {
const chunk = base64.slice(0, 24); // 24 base64 chars = 18 bytes, more than enough
buffer = Uint8Array.from(atob(chunk), (c) => c.charCodeAt(0));
} catch {
return { valid: false, reason: "Invalid base64 encoding." };
// Explicitly reject SVG — it is XML and can carry <script>. We do not accept
// vector uploads here regardless of how cleanly the payload decodes.
if (declaredMime === "image/svg+xml" || declaredMime === "image/svg") {
return { valid: false, reason: "SVG uploads are not permitted." };
}
const actualMime = detectImageMime(buffer);
const decoded = decodeBase64Safe(base64);
if (!decoded.ok) {
return { valid: false, reason: decoded.reason };
}
const actualMime = detectImageMime(decoded.buffer);
if (!actualMime) {
return { valid: false, reason: "File content does not match any known image format." };
}
// Allow JPEG variants (image/jpeg matches image/jpg header)
const normalize = (m: string) => m.replace("image/jpg", "image/jpeg");
if (normalize(declaredMime) !== normalize(actualMime)) {
return {
@@ -74,5 +167,11 @@ export function validateImageDataUrl(dataUrl: string): { valid: true } | { valid
};
}
const trailer = validateTrailer(actualMime, decoded.buffer);
if (!trailer.valid) return trailer;
const polyglot = scanForPolyglotMarkers(decoded.buffer);
if (!polyglot.valid) return polyglot;
return { valid: true };
}
@@ -1,8 +1,5 @@
import {
DispoStagedRecordType,
ImportBatchStatus,
StagedRecordStatus,
} from "@capakraken/db";
import path from "node:path";
import { DispoStagedRecordType, ImportBatchStatus, StagedRecordStatus } from "@capakraken/db";
import {
assessDispoImportReadiness,
stageDispoImportBatch as stageDispoImportBatchApplication,
@@ -34,12 +31,24 @@ const paginationSchema = z.object({
const importBatchStatusSchema = z.nativeEnum(ImportBatchStatus);
const stagedRecordStatusSchema = z.nativeEnum(StagedRecordStatus);
const stagedRecordTypeSchema = z.nativeEnum(DispoStagedRecordType);
// Reject absolute paths and paths that contain `..` segments at the router
// boundary. The workbook reader re-validates against DISPO_IMPORT_DIR as
// defence-in-depth, but rejecting early here gives a clearer error to admin
// users and shrinks the attack surface if the reader is ever called with a
// different allowlist policy.
const workbookPathSchema = z
.string()
.trim()
.min(1, "Workbook path is required.")
.max(4096, "Workbook path is too long.")
.refine((value) => value.toLowerCase().endsWith(".xlsx"), {
message: "Only .xlsx workbook paths are supported.",
})
.refine((value) => !path.isAbsolute(value), {
message: "Workbook path must be relative to the configured import directory.",
})
.refine((value) => !value.split(/[\\/]/).some((segment) => segment === ".."), {
message: "Workbook path must not contain parent-directory segments.",
});
export const stageImportBatchInputSchema = z.object({
@@ -120,17 +129,16 @@ type ListStagedUnresolvedRecordsInput = z.infer<typeof listStagedUnresolvedRecor
type ResolveStagedRecordInput = z.infer<typeof resolveStagedRecordInputSchema>;
type CommitImportBatchInput = z.infer<typeof commitImportBatchInputSchema>;
export async function stageImportBatch(
ctx: DispoProcedureContext,
input: StageImportBatchInput,
) {
export async function stageImportBatch(ctx: DispoProcedureContext, input: StageImportBatchInput) {
return stageDispoImportBatchApplication(ctx.db, {
chargeabilityWorkbookPath: input.chargeabilityWorkbookPath,
planningWorkbookPath: input.planningWorkbookPath,
referenceWorkbookPath: input.referenceWorkbookPath,
...(input.costWorkbookPath !== undefined ? { costWorkbookPath: input.costWorkbookPath } : {}),
...(input.notes !== undefined ? { notes: input.notes } : {}),
...(input.rosterWorkbookPath !== undefined ? { rosterWorkbookPath: input.rosterWorkbookPath } : {}),
...(input.rosterWorkbookPath !== undefined
? { rosterWorkbookPath: input.rosterWorkbookPath }
: {}),
});
}
@@ -142,7 +150,9 @@ export async function validateImportBatch(input: ValidateImportBatchInput) {
...(input.costWorkbookPath !== undefined ? { costWorkbookPath: input.costWorkbookPath } : {}),
...(input.importBatchId !== undefined ? { importBatchId: input.importBatchId } : {}),
...(input.notes !== undefined ? { notes: input.notes } : {}),
...(input.rosterWorkbookPath !== undefined ? { rosterWorkbookPath: input.rosterWorkbookPath } : {}),
...(input.rosterWorkbookPath !== undefined
? { rosterWorkbookPath: input.rosterWorkbookPath }
: {}),
});
}
@@ -200,10 +210,7 @@ export async function resolveStagedRecord(
return resolveStagedRecordMutation(ctx.db, input);
}
export async function commitImportBatch(
ctx: DispoProcedureContext,
input: CommitImportBatchInput,
) {
export async function commitImportBatch(ctx: DispoProcedureContext, input: CommitImportBatchInput) {
return commitImportBatchMutation(ctx.db, {
importBatchId: input.importBatchId,
allowTbdUnresolved: input.allowTbdUnresolved,
+20
View File
@@ -100,6 +100,18 @@ export const projectCoverProcedures = {
message: `Gemini error: ${parseGeminiError(err)}`,
});
}
// Provider-generated output is still untrusted — a compromised or
// misconfigured upstream could return a polyglot payload. Run the
// same magic-byte + trailer + marker check we apply to user uploads
// before we persist the data URL to the database.
const providerCheck = validateImageDataUrl(coverImageUrl);
if (!providerCheck.valid) {
throw new TRPCError({
code: "INTERNAL_SERVER_ERROR",
message: `Provider image rejected by validator: ${providerCheck.reason}`,
});
}
} else {
const dalleClient = createDalleClient(runtimeSettings);
const model =
@@ -135,6 +147,14 @@ export const projectCoverProcedures = {
}
coverImageUrl = `data:image/png;base64,${b64}`;
const providerCheck = validateImageDataUrl(coverImageUrl);
if (!providerCheck.valid) {
throw new TRPCError({
code: "INTERNAL_SERVER_ERROR",
message: `Provider image rejected by validator: ${providerCheck.reason}`,
});
}
}
await ctx.db.project.update({
@@ -1,6 +1,6 @@
import { existsSync } from "node:fs";
import { fileURLToPath } from "node:url";
import { describe, expect, it, vi } from "vitest";
import { afterAll, beforeAll, describe, expect, it, vi } from "vitest";
import {
assessDispoImportReadiness,
parseDispoChargeabilityWorkbook,
@@ -47,6 +47,19 @@ const hasSamples = [
costWorkbookPath,
].every((p) => existsSync(p));
// The dispo reader enforces DISPO_IMPORT_DIR as an allowlist. Sample fixtures
// live at the repo root (outside any production import dir), so scope the
// allowlist to `/` for this suite; a dedicated suite in read-workbook.test.ts
// exercises the containment check explicitly.
const originalImportDir = process.env["DISPO_IMPORT_DIR"];
beforeAll(() => {
process.env["DISPO_IMPORT_DIR"] = "/";
});
afterAll(() => {
if (originalImportDir === undefined) delete process.env["DISPO_IMPORT_DIR"];
else process.env["DISPO_IMPORT_DIR"] = originalImportDir;
});
describe.skipIf(!hasSamples)("dispo import", () => {
it("parses the mandatory reference workbook into normalized master data", async () => {
const parsed = await parseMandatoryDispoReferenceWorkbook(mandatoryWorkbookPath);
@@ -3,7 +3,7 @@ import { cp, mkdtemp, rm, writeFile } from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { afterEach, describe, expect, it } from "vitest";
import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest";
import {
MAX_DISPO_WORKBOOK_BYTES,
MAX_DISPO_WORKBOOK_COLUMNS,
@@ -33,6 +33,20 @@ const itIfSamples = hasSamples ? it : it.skip;
const tempDirectories: string[] = [];
// The dispo reader now enforces DISPO_IMPORT_DIR as an allowlist. Existing
// tests pass absolute paths from sample fixtures or tmpdirs that live outside
// any production import dir, so scope the allowlist to the filesystem root
// for the test suite. New tests below restore a narrow allowlist to exercise
// the containment check explicitly.
const originalImportDir = process.env["DISPO_IMPORT_DIR"];
beforeAll(() => {
process.env["DISPO_IMPORT_DIR"] = "/";
});
afterAll(() => {
if (originalImportDir === undefined) delete process.env["DISPO_IMPORT_DIR"];
else process.env["DISPO_IMPORT_DIR"] = originalImportDir;
});
afterEach(async () => {
await Promise.all(
tempDirectories.splice(0).map(async (directory) => {
@@ -136,4 +150,58 @@ describe("readWorksheetMatrix", () => {
`exceeds the ${MAX_DISPO_WORKBOOK_COLUMNS} column import limit`,
);
}, 30000);
describe("DISPO_IMPORT_DIR allowlist", () => {
it("rejects absolute paths that escape the configured import dir", async () => {
const allowedDir = await makeTempDirectory();
const outsideDir = await makeTempDirectory();
const outsidePath = path.join(outsideDir, "outside.xlsx");
await writeWorkbook(outsidePath, [["a"]]);
const previous = process.env["DISPO_IMPORT_DIR"];
process.env["DISPO_IMPORT_DIR"] = allowedDir;
try {
await expect(readWorksheetMatrix(outsidePath, "Sheet1")).rejects.toThrow(
"Workbook path must be inside the configured import directory",
);
} finally {
process.env["DISPO_IMPORT_DIR"] = previous;
}
});
it("rejects relative paths that traverse out of the configured import dir", async () => {
const allowedDir = await makeTempDirectory();
const siblingDir = await makeTempDirectory();
const siblingPath = path.join(siblingDir, "sibling.xlsx");
await writeWorkbook(siblingPath, [["a"]]);
const relative = path.relative(allowedDir, siblingPath);
expect(relative.startsWith("..")).toBe(true);
const previous = process.env["DISPO_IMPORT_DIR"];
process.env["DISPO_IMPORT_DIR"] = allowedDir;
try {
await expect(readWorksheetMatrix(relative, "Sheet1")).rejects.toThrow(
"Workbook path must be inside the configured import directory",
);
} finally {
process.env["DISPO_IMPORT_DIR"] = previous;
}
});
it("accepts paths that resolve inside the configured import dir", async () => {
const allowedDir = await makeTempDirectory();
const insidePath = path.join(allowedDir, "inside.xlsx");
await writeWorkbook(insidePath, [["hello"]]);
const previous = process.env["DISPO_IMPORT_DIR"];
process.env["DISPO_IMPORT_DIR"] = allowedDir;
try {
const rows = await readWorksheetMatrix("inside.xlsx", "Sheet1");
expect(rows[0]?.[0]).toBe("hello");
} finally {
process.env["DISPO_IMPORT_DIR"] = previous;
}
});
});
});
@@ -4,6 +4,18 @@ import path from "node:path";
export type WorksheetCellValue = boolean | Date | number | string | null;
export type WorksheetMatrix = WorksheetCellValue[][];
// Path allowlist: dispo workbooks must live inside DISPO_IMPORT_DIR. Without
// this guard an admin (or a compromised admin token) could point the ExcelJS
// parser at any file the app process can read, reaching library CVEs on
// arbitrary filesystem paths. Default picks an in-repo `imports/` directory so
// local dev still works; production deployments should set DISPO_IMPORT_DIR
// explicitly to a dedicated volume.
function resolveImportDir(): string {
const configured = process.env["DISPO_IMPORT_DIR"];
const base = configured && configured.trim().length > 0 ? configured : path.resolve("imports");
return path.resolve(base);
}
type ExcelJsModule = typeof import("exceljs");
type ExcelJsWorkbook = InstanceType<ExcelJsModule["Workbook"]>;
type ExcelJsXlsxReader = ExcelJsWorkbook["xlsx"] & {
@@ -25,7 +37,9 @@ const EXCELJS_UNSUPPORTED_TABLE_FILTER_MARKER = '"name":"dateGroupItem"';
let _excelJs: ExcelJsModule | null = null;
const worksheetMatrixCache = new Map<string, Promise<WorksheetMatrix>>();
function normalizeExcelJsModule(module: ExcelJsModule | { default?: ExcelJsModule }): ExcelJsModule {
function normalizeExcelJsModule(
module: ExcelJsModule | { default?: ExcelJsModule },
): ExcelJsModule {
return "Workbook" in module ? module : (module.default as ExcelJsModule);
}
@@ -58,7 +72,19 @@ function cloneWorksheetMatrix(rows: WorksheetMatrix): WorksheetMatrix {
}
async function validateWorkbookPath(workbookPath: string): Promise<string> {
const resolvedPath = path.resolve(workbookPath);
const importDir = resolveImportDir();
const resolvedPath = path.resolve(importDir, workbookPath);
// path.relative returns a string that either starts with ".." (or equals
// "..") or is absolute when the resolved path escapes importDir. Both are
// rejected — defence against `..` sequences, symlink-shaped escapes and
// absolute-path injection via the tRPC surface.
const relative = path.relative(importDir, resolvedPath);
if (relative === ".." || relative.startsWith(`..${path.sep}`) || path.isAbsolute(relative)) {
throw new Error(
`Workbook path must be inside the configured import directory: "${workbookPath}"`,
);
}
if (path.extname(resolvedPath).toLowerCase() !== DISPO_WORKBOOK_EXTENSION) {
throw new Error(
@@ -132,7 +158,11 @@ function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
return String(value);
}
function assertWorksheetShape(rows: WorksheetMatrix, sheetName: string, workbookPath: string): void {
function assertWorksheetShape(
rows: WorksheetMatrix,
sheetName: string,
workbookPath: string,
): void {
if (rows.length > MAX_DISPO_WORKBOOK_ROWS) {
throw new Error(
`Worksheet "${sheetName}" in "${workbookPath}" exceeds the ${MAX_DISPO_WORKBOOK_ROWS} row import limit.`,