c4b01c1bfc
- dispo workbook imports are pinned to DISPO_IMPORT_DIR (default ./imports): tRPC input rejects absolute paths and .. segments, runtime reader re-validates containment via path.relative. Closes a path-traversal class that reached ExcelJS CVEs through admin/compromised tokens. - image validator now checks the full 8-byte PNG magic, enforces PNG IEND and JPEG EOI trailers, scans the decoded buffer for markup polyglot markers (<script, <svg, <iframe, javascript:, onerror=, ...), and explicitly rejects SVG. Provider-generated covers (DALL-E, Gemini) run through the same validator before persistence — an untrusted upstream cannot smuggle a stored-XSS payload past us. - added image-validation.test.ts and tightened documentation. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
289 lines
8.5 KiB
TypeScript
289 lines
8.5 KiB
TypeScript
import { stat } from "node:fs/promises";
|
|
import path from "node:path";
|
|
|
|
export type WorksheetCellValue = boolean | Date | number | string | null;
|
|
export type WorksheetMatrix = WorksheetCellValue[][];
|
|
|
|
// Path allowlist: dispo workbooks must live inside DISPO_IMPORT_DIR. Without
|
|
// this guard an admin (or a compromised admin token) could point the ExcelJS
|
|
// parser at any file the app process can read, reaching library CVEs on
|
|
// arbitrary filesystem paths. Default picks an in-repo `imports/` directory so
|
|
// local dev still works; production deployments should set DISPO_IMPORT_DIR
|
|
// explicitly to a dedicated volume.
|
|
function resolveImportDir(): string {
|
|
const configured = process.env["DISPO_IMPORT_DIR"];
|
|
const base = configured && configured.trim().length > 0 ? configured : path.resolve("imports");
|
|
return path.resolve(base);
|
|
}
|
|
|
|
type ExcelJsModule = typeof import("exceljs");
|
|
type ExcelJsWorkbook = InstanceType<ExcelJsModule["Workbook"]>;
|
|
type ExcelJsXlsxReader = ExcelJsWorkbook["xlsx"] & {
|
|
_processTableEntry?: (
|
|
stream: unknown,
|
|
model: Record<string, unknown>,
|
|
name: string,
|
|
) => Promise<unknown>;
|
|
};
|
|
|
|
const DISPO_WORKBOOK_EXTENSION = ".xlsx";
|
|
export const MAX_DISPO_WORKBOOK_BYTES = 15 * 1024 * 1024;
|
|
export const MAX_DISPO_WORKBOOK_ROWS = 10000;
|
|
export const MAX_DISPO_WORKBOOK_COLUMNS = 1024;
|
|
|
|
const EXCELJS_IGNORE_WORKSHEET_NODES = ["tableParts"];
|
|
const EXCELJS_UNSUPPORTED_TABLE_FILTER_MARKER = '"name":"dateGroupItem"';
|
|
|
|
let _excelJs: ExcelJsModule | null = null;
|
|
const worksheetMatrixCache = new Map<string, Promise<WorksheetMatrix>>();
|
|
|
|
function normalizeExcelJsModule(
|
|
module: ExcelJsModule | { default?: ExcelJsModule },
|
|
): ExcelJsModule {
|
|
return "Workbook" in module ? module : (module.default as ExcelJsModule);
|
|
}
|
|
|
|
async function getExcelJS() {
|
|
if (!_excelJs) {
|
|
_excelJs = normalizeExcelJsModule(await import("exceljs"));
|
|
}
|
|
|
|
return _excelJs;
|
|
}
|
|
|
|
function trimTrailingNulls(row: WorksheetCellValue[]): WorksheetCellValue[] {
|
|
let end = row.length;
|
|
while (end > 0 && row[end - 1] === null) {
|
|
end -= 1;
|
|
}
|
|
return row.slice(0, end);
|
|
}
|
|
|
|
function trimTrailingEmptyRows(rows: WorksheetMatrix): WorksheetMatrix {
|
|
let end = rows.length;
|
|
while (end > 0 && rows[end - 1]?.length === 0) {
|
|
end -= 1;
|
|
}
|
|
return rows.slice(0, end);
|
|
}
|
|
|
|
function cloneWorksheetMatrix(rows: WorksheetMatrix): WorksheetMatrix {
|
|
return rows.map((row) => row.slice());
|
|
}
|
|
|
|
async function validateWorkbookPath(workbookPath: string): Promise<string> {
|
|
const importDir = resolveImportDir();
|
|
const resolvedPath = path.resolve(importDir, workbookPath);
|
|
|
|
// path.relative returns a string that either starts with ".." (or equals
|
|
// "..") or is absolute when the resolved path escapes importDir. Both are
|
|
// rejected — defence against `..` sequences, symlink-shaped escapes and
|
|
// absolute-path injection via the tRPC surface.
|
|
const relative = path.relative(importDir, resolvedPath);
|
|
if (relative === ".." || relative.startsWith(`..${path.sep}`) || path.isAbsolute(relative)) {
|
|
throw new Error(
|
|
`Workbook path must be inside the configured import directory: "${workbookPath}"`,
|
|
);
|
|
}
|
|
|
|
if (path.extname(resolvedPath).toLowerCase() !== DISPO_WORKBOOK_EXTENSION) {
|
|
throw new Error(
|
|
`Only ${DISPO_WORKBOOK_EXTENSION} workbooks are supported for dispo imports: "${resolvedPath}"`,
|
|
);
|
|
}
|
|
|
|
const fileStat = await stat(resolvedPath);
|
|
if (!fileStat.isFile()) {
|
|
throw new Error(`Workbook path must point to a readable file: "${resolvedPath}"`);
|
|
}
|
|
|
|
if (fileStat.size <= 0) {
|
|
throw new Error(`Workbook file is empty: "${resolvedPath}"`);
|
|
}
|
|
|
|
if (fileStat.size > MAX_DISPO_WORKBOOK_BYTES) {
|
|
throw new Error(
|
|
`Workbook file exceeds the ${MAX_DISPO_WORKBOOK_BYTES} byte import limit: "${resolvedPath}"`,
|
|
);
|
|
}
|
|
|
|
return resolvedPath;
|
|
}
|
|
|
|
function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
|
|
if (value === undefined || value === null) {
|
|
return null;
|
|
}
|
|
|
|
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
return value;
|
|
}
|
|
|
|
if (value instanceof Date) {
|
|
return value;
|
|
}
|
|
|
|
if (typeof value === "object") {
|
|
const record = value as Record<string, unknown>;
|
|
|
|
if ("result" in record) {
|
|
return normalizeWorksheetCellValue(record.result);
|
|
}
|
|
|
|
if ("text" in record && typeof record.text === "string") {
|
|
return record.text;
|
|
}
|
|
|
|
if ("hyperlink" in record && typeof record.hyperlink === "string") {
|
|
return record.hyperlink;
|
|
}
|
|
|
|
if ("richText" in record && Array.isArray(record.richText)) {
|
|
return record.richText
|
|
.map((part) => {
|
|
if (part && typeof part === "object" && "text" in part) {
|
|
const text = (part as { text?: unknown }).text;
|
|
return typeof text === "string" ? text : "";
|
|
}
|
|
return "";
|
|
})
|
|
.join("");
|
|
}
|
|
|
|
if ("error" in record && typeof record.error === "string") {
|
|
return record.error;
|
|
}
|
|
}
|
|
|
|
return String(value);
|
|
}
|
|
|
|
function assertWorksheetShape(
|
|
rows: WorksheetMatrix,
|
|
sheetName: string,
|
|
workbookPath: string,
|
|
): void {
|
|
if (rows.length > MAX_DISPO_WORKBOOK_ROWS) {
|
|
throw new Error(
|
|
`Worksheet "${sheetName}" in "${workbookPath}" exceeds the ${MAX_DISPO_WORKBOOK_ROWS} row import limit.`,
|
|
);
|
|
}
|
|
|
|
const widestRow = rows.reduce((max, row) => Math.max(max, row.length), 0);
|
|
if (widestRow > MAX_DISPO_WORKBOOK_COLUMNS) {
|
|
throw new Error(
|
|
`Worksheet "${sheetName}" in "${workbookPath}" exceeds the ${MAX_DISPO_WORKBOOK_COLUMNS} column import limit.`,
|
|
);
|
|
}
|
|
}
|
|
|
|
function isUnsupportedExcelJsTableFilterError(error: unknown): boolean {
|
|
return error instanceof Error && error.message.includes(EXCELJS_UNSUPPORTED_TABLE_FILTER_MARKER);
|
|
}
|
|
|
|
function patchExcelJsTableCompatibility(workbook: ExcelJsWorkbook): void {
|
|
const reader = workbook.xlsx as ExcelJsXlsxReader;
|
|
const originalProcessTableEntry = reader._processTableEntry;
|
|
|
|
if (typeof originalProcessTableEntry !== "function") {
|
|
return;
|
|
}
|
|
|
|
reader._processTableEntry = async function processTableEntryWithCompatibilityFallback(
|
|
stream,
|
|
model,
|
|
name,
|
|
) {
|
|
try {
|
|
return await originalProcessTableEntry.call(this, stream, model, name);
|
|
} catch (error) {
|
|
if (isUnsupportedExcelJsTableFilterError(error)) {
|
|
return undefined;
|
|
}
|
|
|
|
throw error;
|
|
}
|
|
};
|
|
}
|
|
|
|
export async function readWorksheetMatrix(
|
|
workbookPath: string,
|
|
sheetName: string,
|
|
): Promise<WorksheetMatrix> {
|
|
const resolvedPath = await validateWorkbookPath(workbookPath);
|
|
const cacheKey = `${resolvedPath}::${sheetName}`;
|
|
const cachedMatrix = worksheetMatrixCache.get(cacheKey);
|
|
if (cachedMatrix) {
|
|
return cloneWorksheetMatrix(await cachedMatrix);
|
|
}
|
|
|
|
const matrixPromise = (async () => {
|
|
const ExcelJS = await getExcelJS();
|
|
const workbook = new ExcelJS.Workbook();
|
|
patchExcelJsTableCompatibility(workbook);
|
|
await workbook.xlsx.readFile(resolvedPath, { ignoreNodes: EXCELJS_IGNORE_WORKSHEET_NODES });
|
|
|
|
const worksheet = workbook.getWorksheet(sheetName);
|
|
if (!worksheet) {
|
|
throw new Error(`Worksheet "${sheetName}" not found in workbook "${resolvedPath}"`);
|
|
}
|
|
|
|
const rows: WorksheetMatrix = [];
|
|
for (let rowNumber = 1; rowNumber <= worksheet.rowCount; rowNumber += 1) {
|
|
const row = worksheet.getRow(rowNumber);
|
|
const cells: WorksheetCellValue[] = [];
|
|
|
|
for (let columnNumber = 1; columnNumber <= row.cellCount; columnNumber += 1) {
|
|
cells.push(normalizeWorksheetCellValue(row.getCell(columnNumber).value));
|
|
}
|
|
|
|
rows.push(trimTrailingNulls(cells));
|
|
}
|
|
|
|
const normalizedRows = trimTrailingEmptyRows(rows);
|
|
|
|
assertWorksheetShape(normalizedRows, sheetName, resolvedPath);
|
|
|
|
return normalizedRows;
|
|
})();
|
|
|
|
worksheetMatrixCache.set(cacheKey, matrixPromise);
|
|
|
|
try {
|
|
return cloneWorksheetMatrix(await matrixPromise);
|
|
} catch (error) {
|
|
worksheetMatrixCache.delete(cacheKey);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
export function getCellString(
|
|
rows: WorksheetMatrix,
|
|
rowNumber: number,
|
|
columnNumber: number,
|
|
): string | null {
|
|
const value = rows[rowNumber - 1]?.[columnNumber - 1];
|
|
if (value === null || value === undefined) {
|
|
return null;
|
|
}
|
|
|
|
if (value instanceof Date) {
|
|
return value.toISOString();
|
|
}
|
|
|
|
return String(value);
|
|
}
|
|
|
|
export function toColumnLetter(columnNumber: number): string {
|
|
let current = columnNumber;
|
|
let result = "";
|
|
|
|
while (current > 0) {
|
|
const remainder = (current - 1) % 26;
|
|
result = String.fromCharCode(65 + remainder) + result;
|
|
current = Math.floor((current - 1) / 26);
|
|
}
|
|
|
|
return result;
|
|
}
|