import { stat } from "node:fs/promises"; import path from "node:path"; export type WorksheetCellValue = boolean | Date | number | string | null; export type WorksheetMatrix = WorksheetCellValue[][]; // Path allowlist: dispo workbooks must live inside DISPO_IMPORT_DIR. Without // this guard an admin (or a compromised admin token) could point the ExcelJS // parser at any file the app process can read, reaching library CVEs on // arbitrary filesystem paths. Default picks an in-repo `imports/` directory so // local dev still works; production deployments should set DISPO_IMPORT_DIR // explicitly to a dedicated volume. function resolveImportDir(): string { const configured = process.env["DISPO_IMPORT_DIR"]; const base = configured && configured.trim().length > 0 ? configured : path.resolve("imports"); return path.resolve(base); } type ExcelJsModule = typeof import("exceljs"); type ExcelJsWorkbook = InstanceType; type ExcelJsXlsxReader = ExcelJsWorkbook["xlsx"] & { _processTableEntry?: ( stream: unknown, model: Record, name: string, ) => Promise; }; const DISPO_WORKBOOK_EXTENSION = ".xlsx"; export const MAX_DISPO_WORKBOOK_BYTES = 15 * 1024 * 1024; export const MAX_DISPO_WORKBOOK_ROWS = 10000; export const MAX_DISPO_WORKBOOK_COLUMNS = 1024; const EXCELJS_IGNORE_WORKSHEET_NODES = ["tableParts"]; const EXCELJS_UNSUPPORTED_TABLE_FILTER_MARKER = '"name":"dateGroupItem"'; let _excelJs: ExcelJsModule | null = null; const worksheetMatrixCache = new Map>(); function normalizeExcelJsModule( module: ExcelJsModule | { default?: ExcelJsModule }, ): ExcelJsModule { return "Workbook" in module ? module : (module.default as ExcelJsModule); } async function getExcelJS() { if (!_excelJs) { _excelJs = normalizeExcelJsModule(await import("exceljs")); } return _excelJs; } function trimTrailingNulls(row: WorksheetCellValue[]): WorksheetCellValue[] { let end = row.length; while (end > 0 && row[end - 1] === null) { end -= 1; } return row.slice(0, end); } function trimTrailingEmptyRows(rows: WorksheetMatrix): WorksheetMatrix { let end = rows.length; while (end > 0 && rows[end - 1]?.length === 0) { end -= 1; } return rows.slice(0, end); } function cloneWorksheetMatrix(rows: WorksheetMatrix): WorksheetMatrix { return rows.map((row) => row.slice()); } async function validateWorkbookPath(workbookPath: string): Promise { const importDir = resolveImportDir(); const resolvedPath = path.resolve(importDir, workbookPath); // path.relative returns a string that either starts with ".." (or equals // "..") or is absolute when the resolved path escapes importDir. Both are // rejected — defence against `..` sequences, symlink-shaped escapes and // absolute-path injection via the tRPC surface. const relative = path.relative(importDir, resolvedPath); if (relative === ".." || relative.startsWith(`..${path.sep}`) || path.isAbsolute(relative)) { throw new Error( `Workbook path must be inside the configured import directory: "${workbookPath}"`, ); } if (path.extname(resolvedPath).toLowerCase() !== DISPO_WORKBOOK_EXTENSION) { throw new Error( `Only ${DISPO_WORKBOOK_EXTENSION} workbooks are supported for dispo imports: "${resolvedPath}"`, ); } const fileStat = await stat(resolvedPath); if (!fileStat.isFile()) { throw new Error(`Workbook path must point to a readable file: "${resolvedPath}"`); } if (fileStat.size <= 0) { throw new Error(`Workbook file is empty: "${resolvedPath}"`); } if (fileStat.size > MAX_DISPO_WORKBOOK_BYTES) { throw new Error( `Workbook file exceeds the ${MAX_DISPO_WORKBOOK_BYTES} byte import limit: "${resolvedPath}"`, ); } return resolvedPath; } function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue { if (value === undefined || value === null) { return null; } if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") { return value; } if (value instanceof Date) { return value; } if (typeof value === "object") { const record = value as Record; if ("result" in record) { return normalizeWorksheetCellValue(record.result); } if ("text" in record && typeof record.text === "string") { return record.text; } if ("hyperlink" in record && typeof record.hyperlink === "string") { return record.hyperlink; } if ("richText" in record && Array.isArray(record.richText)) { return record.richText .map((part) => { if (part && typeof part === "object" && "text" in part) { const text = (part as { text?: unknown }).text; return typeof text === "string" ? text : ""; } return ""; }) .join(""); } if ("error" in record && typeof record.error === "string") { return record.error; } } return String(value); } function assertWorksheetShape( rows: WorksheetMatrix, sheetName: string, workbookPath: string, ): void { if (rows.length > MAX_DISPO_WORKBOOK_ROWS) { throw new Error( `Worksheet "${sheetName}" in "${workbookPath}" exceeds the ${MAX_DISPO_WORKBOOK_ROWS} row import limit.`, ); } const widestRow = rows.reduce((max, row) => Math.max(max, row.length), 0); if (widestRow > MAX_DISPO_WORKBOOK_COLUMNS) { throw new Error( `Worksheet "${sheetName}" in "${workbookPath}" exceeds the ${MAX_DISPO_WORKBOOK_COLUMNS} column import limit.`, ); } } function isUnsupportedExcelJsTableFilterError(error: unknown): boolean { return error instanceof Error && error.message.includes(EXCELJS_UNSUPPORTED_TABLE_FILTER_MARKER); } function patchExcelJsTableCompatibility(workbook: ExcelJsWorkbook): void { const reader = workbook.xlsx as ExcelJsXlsxReader; const originalProcessTableEntry = reader._processTableEntry; if (typeof originalProcessTableEntry !== "function") { return; } reader._processTableEntry = async function processTableEntryWithCompatibilityFallback( stream, model, name, ) { try { return await originalProcessTableEntry.call(this, stream, model, name); } catch (error) { if (isUnsupportedExcelJsTableFilterError(error)) { return undefined; } throw error; } }; } export async function readWorksheetMatrix( workbookPath: string, sheetName: string, ): Promise { const resolvedPath = await validateWorkbookPath(workbookPath); const cacheKey = `${resolvedPath}::${sheetName}`; const cachedMatrix = worksheetMatrixCache.get(cacheKey); if (cachedMatrix) { return cloneWorksheetMatrix(await cachedMatrix); } const matrixPromise = (async () => { const ExcelJS = await getExcelJS(); const workbook = new ExcelJS.Workbook(); patchExcelJsTableCompatibility(workbook); await workbook.xlsx.readFile(resolvedPath, { ignoreNodes: EXCELJS_IGNORE_WORKSHEET_NODES }); const worksheet = workbook.getWorksheet(sheetName); if (!worksheet) { throw new Error(`Worksheet "${sheetName}" not found in workbook "${resolvedPath}"`); } const rows: WorksheetMatrix = []; for (let rowNumber = 1; rowNumber <= worksheet.rowCount; rowNumber += 1) { const row = worksheet.getRow(rowNumber); const cells: WorksheetCellValue[] = []; for (let columnNumber = 1; columnNumber <= row.cellCount; columnNumber += 1) { cells.push(normalizeWorksheetCellValue(row.getCell(columnNumber).value)); } rows.push(trimTrailingNulls(cells)); } const normalizedRows = trimTrailingEmptyRows(rows); assertWorksheetShape(normalizedRows, sheetName, resolvedPath); return normalizedRows; })(); worksheetMatrixCache.set(cacheKey, matrixPromise); try { return cloneWorksheetMatrix(await matrixPromise); } catch (error) { worksheetMatrixCache.delete(cacheKey); throw error; } } export function getCellString( rows: WorksheetMatrix, rowNumber: number, columnNumber: number, ): string | null { const value = rows[rowNumber - 1]?.[columnNumber - 1]; if (value === null || value === undefined) { return null; } if (value instanceof Date) { return value.toISOString(); } return String(value); } export function toColumnLetter(columnNumber: number): string { let current = columnNumber; let result = ""; while (current > 0) { const remainder = (current - 1) % 26; result = String.fromCharCode(65 + remainder) + result; current = Math.floor((current - 1) / 26); } return result; }