const XLSX_EXTENSION = ".xlsx"; const CSV_EXTENSION = ".csv"; const XLS_EXTENSION = ".xls"; export const MAX_BROWSER_SPREADSHEET_BYTES = 10 * 1024 * 1024; export const MAX_BROWSER_SPREADSHEET_ROWS = 5000; export const MAX_BROWSER_SPREADSHEET_COLUMNS = 200; type ExcelJsModule = typeof import("exceljs"); let _excelJs: ExcelJsModule | null = null; function getFileExtension(fileName: string): string { const dotIndex = fileName.lastIndexOf("."); if (dotIndex < 0) { return ""; } return fileName.slice(dotIndex).toLowerCase(); } function isSupportedSpreadsheetExtension(extension: string): boolean { return extension === XLSX_EXTENSION || extension === CSV_EXTENSION; } function normalizeCellString(value: unknown): string { if (value === undefined || value === null) { return ""; } if (value instanceof Date) { return value.toISOString(); } if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") { return String(value); } if (typeof value === "object") { const record = value as Record; if ("result" in record) { return normalizeCellString(record.result); } if ("text" in record && typeof record.text === "string") { return record.text; } if ("hyperlink" in record && typeof record.hyperlink === "string") { return record.hyperlink; } if ("richText" in record && Array.isArray(record.richText)) { return record.richText .map((part) => { if (part && typeof part === "object" && "text" in part) { const text = (part as { text?: unknown }).text; return typeof text === "string" ? text : ""; } return ""; }) .join(""); } if ("error" in record && typeof record.error === "string") { return record.error; } } return String(value); } function parseCsvMatrix(input: string): string[][] { const text = input.replace(/^\uFEFF/u, ""); const rows: string[][] = []; let currentRow: string[] = []; let currentCell = ""; let inQuotes = false; for (let index = 0; index < text.length; index += 1) { const character = text[index]; const nextCharacter = text[index + 1]; if (character === "\"") { if (inQuotes && nextCharacter === "\"") { currentCell += "\""; index += 1; } else { inQuotes = !inQuotes; } continue; } if (!inQuotes && character === ",") { currentRow.push(currentCell); currentCell = ""; continue; } if (!inQuotes && (character === "\n" || character === "\r")) { if (character === "\r" && nextCharacter === "\n") { index += 1; } currentRow.push(currentCell); rows.push(currentRow); currentRow = []; currentCell = ""; continue; } currentCell += character; } if (currentCell.length > 0 || currentRow.length > 0) { currentRow.push(currentCell); rows.push(currentRow); } return rows; } export function assertTabularMatrixWithinLimits(rows: string[][], contextLabel: string): void { if (rows.length > MAX_BROWSER_SPREADSHEET_ROWS + 1) { throw new Error( `The selected file exceeds the ${MAX_BROWSER_SPREADSHEET_ROWS} row limit for ${contextLabel}.`, ); } const widestRow = rows.reduce((max, row) => Math.max(max, row.length), 0); if (widestRow > MAX_BROWSER_SPREADSHEET_COLUMNS) { throw new Error( `The selected file exceeds the ${MAX_BROWSER_SPREADSHEET_COLUMNS} column limit for ${contextLabel}.`, ); } } export function assertHeaderRow(headers: string[], contextLabel: string): void { if (headers.length === 0) { return; } const blankHeaderIndex = headers.findIndex((header) => header.length === 0); if (blankHeaderIndex >= 0) { throw new Error( `The selected file contains an empty header cell in column ${blankHeaderIndex + 1} and cannot be used for ${contextLabel}.`, ); } const seen = new Set(); for (const header of headers) { const normalized = header.toLowerCase(); if (seen.has(normalized)) { throw new Error(`The selected file contains duplicate header "${header}" and cannot be used for ${contextLabel}.`); } seen.add(normalized); } } function matrixToObjects(rows: string[][], contextLabel: string): Record[] { assertTabularMatrixWithinLimits(rows, contextLabel); const headers = (rows[0] ?? []).map((header) => header.trim()); assertHeaderRow(headers, contextLabel); if (headers.length === 0) { return []; } return rows .slice(1) .filter((row) => row.some((value) => value.trim() !== "")) .map((row) => headers.reduce>((record, header, index) => { record[header] = row[index] ?? ""; return record; }, {}), ); } async function getExcelJS() { if (!_excelJs) { _excelJs = await import("exceljs"); } return _excelJs; } export function assertSpreadsheetFile( file: File, options?: { allowCsv?: boolean; contextLabel?: string }, ): void { const extension = getFileExtension(file.name); const allowCsv = options?.allowCsv ?? true; const contextLabel = options?.contextLabel ?? "spreadsheet import"; if (file.size <= 0) { throw new Error(`The selected file is empty and cannot be used for ${contextLabel}.`); } if (file.size > MAX_BROWSER_SPREADSHEET_BYTES) { throw new Error( `The selected file exceeds the ${MAX_BROWSER_SPREADSHEET_BYTES} byte limit for ${contextLabel}.`, ); } if (extension === XLS_EXTENSION) { throw new Error( "Legacy .xls files are not supported. Please resave the workbook as .xlsx or export it as .csv.", ); } if (extension === XLSX_EXTENSION) { return; } if (allowCsv && extension === CSV_EXTENSION) { return; } if (allowCsv) { throw new Error("Unsupported file type. Please upload a .xlsx or .csv file."); } throw new Error("Unsupported file type. Please upload a .xlsx file."); } async function parseXlsxSpreadsheet(file: File): Promise[]> { const ExcelJS = await getExcelJS(); const workbook = new ExcelJS.Workbook(); const buffer = await file.arrayBuffer(); await workbook.xlsx.load(buffer); const worksheet = workbook.worksheets[0]; if (!worksheet) { return []; } const rows: string[][] = []; for (let rowNumber = 1; rowNumber <= worksheet.rowCount; rowNumber += 1) { const row = worksheet.getRow(rowNumber); const cells: string[] = []; for (let columnNumber = 1; columnNumber <= row.cellCount; columnNumber += 1) { cells.push(normalizeCellString(row.getCell(columnNumber).value)); } rows.push(cells); } return matrixToObjects(rows, "spreadsheet import"); } /** * Parse a spreadsheet import file to an array of row objects. * Keys come from the first row (headers). */ export async function parseSpreadsheet(file: File): Promise[]> { assertSpreadsheetFile(file); if (getFileExtension(file.name) === CSV_EXTENSION) { return matrixToObjects(parseCsvMatrix(await file.text()), "spreadsheet import"); } return parseXlsxSpreadsheet(file); } export function isSpreadsheetFile(file: File): boolean { return isSupportedSpreadsheetExtension(getFileExtension(file.name)); }