feat(import): harden untrusted spreadsheet boundaries
This commit is contained in:
@@ -1,8 +1,76 @@
|
||||
import * as XLSX from "xlsx";
|
||||
import { stat } from "node:fs/promises";
|
||||
import { createRequire } from "node:module";
|
||||
import path from "node:path";
|
||||
|
||||
export type WorksheetCellValue = boolean | Date | number | string | null;
|
||||
export type WorksheetMatrix = WorksheetCellValue[][];
|
||||
|
||||
type XlsxWorkbook = {
|
||||
Sheets: Record<string, unknown>;
|
||||
};
|
||||
|
||||
type SheetToJsonOptions = {
|
||||
header: 1;
|
||||
raw: true;
|
||||
defval: null;
|
||||
};
|
||||
|
||||
type XlsxRuntime = {
|
||||
readFile(filePath: string, options: { cellDates: true; dense: true }): XlsxWorkbook;
|
||||
utils: {
|
||||
sheet_to_json<T>(worksheet: unknown, options: SheetToJsonOptions): T[];
|
||||
};
|
||||
};
|
||||
|
||||
const require = createRequire(import.meta.url);
|
||||
const XLSX = require("xlsx") as XlsxRuntime;
|
||||
|
||||
const DISPO_WORKBOOK_EXTENSION = ".xlsx";
|
||||
export const MAX_DISPO_WORKBOOK_BYTES = 15 * 1024 * 1024;
|
||||
|
||||
function trimTrailingNulls(row: WorksheetCellValue[]): WorksheetCellValue[] {
|
||||
let end = row.length;
|
||||
while (end > 0 && row[end - 1] === null) {
|
||||
end -= 1;
|
||||
}
|
||||
return row.slice(0, end);
|
||||
}
|
||||
|
||||
function trimTrailingEmptyRows(rows: WorksheetMatrix): WorksheetMatrix {
|
||||
let end = rows.length;
|
||||
while (end > 0 && rows[end - 1]?.length === 0) {
|
||||
end -= 1;
|
||||
}
|
||||
return rows.slice(0, end);
|
||||
}
|
||||
|
||||
async function validateWorkbookPath(workbookPath: string): Promise<string> {
|
||||
const resolvedPath = path.resolve(workbookPath);
|
||||
|
||||
if (path.extname(resolvedPath).toLowerCase() !== DISPO_WORKBOOK_EXTENSION) {
|
||||
throw new Error(
|
||||
`Only ${DISPO_WORKBOOK_EXTENSION} workbooks are supported for dispo imports: "${resolvedPath}"`,
|
||||
);
|
||||
}
|
||||
|
||||
const fileStat = await stat(resolvedPath);
|
||||
if (!fileStat.isFile()) {
|
||||
throw new Error(`Workbook path must point to a readable file: "${resolvedPath}"`);
|
||||
}
|
||||
|
||||
if (fileStat.size <= 0) {
|
||||
throw new Error(`Workbook file is empty: "${resolvedPath}"`);
|
||||
}
|
||||
|
||||
if (fileStat.size > MAX_DISPO_WORKBOOK_BYTES) {
|
||||
throw new Error(
|
||||
`Workbook file exceeds the ${MAX_DISPO_WORKBOOK_BYTES} byte import limit: "${resolvedPath}"`,
|
||||
);
|
||||
}
|
||||
|
||||
return resolvedPath;
|
||||
}
|
||||
|
||||
function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
|
||||
if (value === undefined || value === null) {
|
||||
return null;
|
||||
@@ -16,6 +84,38 @@ function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
|
||||
return value;
|
||||
}
|
||||
|
||||
if (typeof value === "object") {
|
||||
const record = value as Record<string, unknown>;
|
||||
|
||||
if ("result" in record) {
|
||||
return normalizeWorksheetCellValue(record.result);
|
||||
}
|
||||
|
||||
if ("text" in record && typeof record.text === "string") {
|
||||
return record.text;
|
||||
}
|
||||
|
||||
if ("hyperlink" in record && typeof record.hyperlink === "string") {
|
||||
return record.hyperlink;
|
||||
}
|
||||
|
||||
if ("richText" in record && Array.isArray(record.richText)) {
|
||||
return record.richText
|
||||
.map((part) => {
|
||||
if (part && typeof part === "object" && "text" in part) {
|
||||
const text = (part as { text?: unknown }).text;
|
||||
return typeof text === "string" ? text : "";
|
||||
}
|
||||
return "";
|
||||
})
|
||||
.join("");
|
||||
}
|
||||
|
||||
if ("error" in record && typeof record.error === "string") {
|
||||
return record.error;
|
||||
}
|
||||
}
|
||||
|
||||
return String(value);
|
||||
}
|
||||
|
||||
@@ -23,13 +123,14 @@ export async function readWorksheetMatrix(
|
||||
workbookPath: string,
|
||||
sheetName: string,
|
||||
): Promise<WorksheetMatrix> {
|
||||
const workbook = XLSX.readFile(workbookPath, {
|
||||
const resolvedPath = await validateWorkbookPath(workbookPath);
|
||||
const workbook = XLSX.readFile(resolvedPath, {
|
||||
cellDates: true,
|
||||
dense: true,
|
||||
});
|
||||
const worksheet = workbook.Sheets[sheetName];
|
||||
if (!worksheet) {
|
||||
throw new Error(`Worksheet "${sheetName}" not found in workbook "${workbookPath}"`);
|
||||
throw new Error(`Worksheet "${sheetName}" not found in workbook "${resolvedPath}"`);
|
||||
}
|
||||
|
||||
const rows = XLSX.utils.sheet_to_json<(WorksheetCellValue | null)[]>(worksheet, {
|
||||
@@ -38,7 +139,11 @@ export async function readWorksheetMatrix(
|
||||
defval: null,
|
||||
});
|
||||
|
||||
return rows.map((row) => row.map((value) => normalizeWorksheetCellValue(value)));
|
||||
return trimTrailingEmptyRows(
|
||||
rows.map((row: (WorksheetCellValue | null)[]) =>
|
||||
trimTrailingNulls(row.map((value: WorksheetCellValue | null) => normalizeWorksheetCellValue(value))),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
export function getCellString(
|
||||
|
||||
Reference in New Issue
Block a user