267 lines
7.2 KiB
TypeScript
267 lines
7.2 KiB
TypeScript
const XLSX_EXTENSION = ".xlsx";
|
|
const CSV_EXTENSION = ".csv";
|
|
const XLS_EXTENSION = ".xls";
|
|
|
|
export const MAX_BROWSER_SPREADSHEET_BYTES = 10 * 1024 * 1024;
|
|
export const MAX_BROWSER_SPREADSHEET_ROWS = 5000;
|
|
export const MAX_BROWSER_SPREADSHEET_COLUMNS = 200;
|
|
|
|
type ExcelJsModule = typeof import("exceljs");
|
|
let _excelJs: ExcelJsModule | null = null;
|
|
|
|
function getFileExtension(fileName: string): string {
|
|
const dotIndex = fileName.lastIndexOf(".");
|
|
if (dotIndex < 0) {
|
|
return "";
|
|
}
|
|
|
|
return fileName.slice(dotIndex).toLowerCase();
|
|
}
|
|
|
|
function isSupportedSpreadsheetExtension(extension: string): boolean {
|
|
return extension === XLSX_EXTENSION || extension === CSV_EXTENSION;
|
|
}
|
|
|
|
function normalizeCellString(value: unknown): string {
|
|
if (value === undefined || value === null) {
|
|
return "";
|
|
}
|
|
|
|
if (value instanceof Date) {
|
|
return value.toISOString();
|
|
}
|
|
|
|
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
|
return String(value);
|
|
}
|
|
|
|
if (typeof value === "object") {
|
|
const record = value as Record<string, unknown>;
|
|
|
|
if ("result" in record) {
|
|
return normalizeCellString(record.result);
|
|
}
|
|
|
|
if ("text" in record && typeof record.text === "string") {
|
|
return record.text;
|
|
}
|
|
|
|
if ("hyperlink" in record && typeof record.hyperlink === "string") {
|
|
return record.hyperlink;
|
|
}
|
|
|
|
if ("richText" in record && Array.isArray(record.richText)) {
|
|
return record.richText
|
|
.map((part) => {
|
|
if (part && typeof part === "object" && "text" in part) {
|
|
const text = (part as { text?: unknown }).text;
|
|
return typeof text === "string" ? text : "";
|
|
}
|
|
return "";
|
|
})
|
|
.join("");
|
|
}
|
|
|
|
if ("error" in record && typeof record.error === "string") {
|
|
return record.error;
|
|
}
|
|
}
|
|
|
|
return String(value);
|
|
}
|
|
|
|
function parseCsvMatrix(input: string): string[][] {
|
|
const text = input.replace(/^\uFEFF/u, "");
|
|
const rows: string[][] = [];
|
|
let currentRow: string[] = [];
|
|
let currentCell = "";
|
|
let inQuotes = false;
|
|
|
|
for (let index = 0; index < text.length; index += 1) {
|
|
const character = text[index];
|
|
const nextCharacter = text[index + 1];
|
|
|
|
if (character === "\"") {
|
|
if (inQuotes && nextCharacter === "\"") {
|
|
currentCell += "\"";
|
|
index += 1;
|
|
} else {
|
|
inQuotes = !inQuotes;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
if (!inQuotes && character === ",") {
|
|
currentRow.push(currentCell);
|
|
currentCell = "";
|
|
continue;
|
|
}
|
|
|
|
if (!inQuotes && (character === "\n" || character === "\r")) {
|
|
if (character === "\r" && nextCharacter === "\n") {
|
|
index += 1;
|
|
}
|
|
currentRow.push(currentCell);
|
|
rows.push(currentRow);
|
|
currentRow = [];
|
|
currentCell = "";
|
|
continue;
|
|
}
|
|
|
|
currentCell += character;
|
|
}
|
|
|
|
if (currentCell.length > 0 || currentRow.length > 0) {
|
|
currentRow.push(currentCell);
|
|
rows.push(currentRow);
|
|
}
|
|
|
|
return rows;
|
|
}
|
|
|
|
export function assertTabularMatrixWithinLimits(rows: string[][], contextLabel: string): void {
|
|
if (rows.length > MAX_BROWSER_SPREADSHEET_ROWS + 1) {
|
|
throw new Error(
|
|
`The selected file exceeds the ${MAX_BROWSER_SPREADSHEET_ROWS} row limit for ${contextLabel}.`,
|
|
);
|
|
}
|
|
|
|
const widestRow = rows.reduce((max, row) => Math.max(max, row.length), 0);
|
|
if (widestRow > MAX_BROWSER_SPREADSHEET_COLUMNS) {
|
|
throw new Error(
|
|
`The selected file exceeds the ${MAX_BROWSER_SPREADSHEET_COLUMNS} column limit for ${contextLabel}.`,
|
|
);
|
|
}
|
|
}
|
|
|
|
export function assertHeaderRow(headers: string[], contextLabel: string): void {
|
|
if (headers.length === 0) {
|
|
return;
|
|
}
|
|
|
|
const blankHeaderIndex = headers.findIndex((header) => header.length === 0);
|
|
if (blankHeaderIndex >= 0) {
|
|
throw new Error(
|
|
`The selected file contains an empty header cell in column ${blankHeaderIndex + 1} and cannot be used for ${contextLabel}.`,
|
|
);
|
|
}
|
|
|
|
const seen = new Set<string>();
|
|
for (const header of headers) {
|
|
const normalized = header.toLowerCase();
|
|
if (seen.has(normalized)) {
|
|
throw new Error(`The selected file contains duplicate header "${header}" and cannot be used for ${contextLabel}.`);
|
|
}
|
|
seen.add(normalized);
|
|
}
|
|
}
|
|
|
|
function matrixToObjects(rows: string[][], contextLabel: string): Record<string, string>[] {
|
|
assertTabularMatrixWithinLimits(rows, contextLabel);
|
|
const headers = (rows[0] ?? []).map((header) => header.trim());
|
|
assertHeaderRow(headers, contextLabel);
|
|
if (headers.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
return rows
|
|
.slice(1)
|
|
.filter((row) => row.some((value) => value.trim() !== ""))
|
|
.map((row) =>
|
|
headers.reduce<Record<string, string>>((record, header, index) => {
|
|
record[header] = row[index] ?? "";
|
|
return record;
|
|
}, {}),
|
|
);
|
|
}
|
|
|
|
async function getExcelJS() {
|
|
if (!_excelJs) {
|
|
_excelJs = await import("exceljs");
|
|
}
|
|
return _excelJs;
|
|
}
|
|
|
|
export function assertSpreadsheetFile(
|
|
file: File,
|
|
options?: { allowCsv?: boolean; contextLabel?: string },
|
|
): void {
|
|
const extension = getFileExtension(file.name);
|
|
const allowCsv = options?.allowCsv ?? true;
|
|
const contextLabel = options?.contextLabel ?? "spreadsheet import";
|
|
|
|
if (file.size <= 0) {
|
|
throw new Error(`The selected file is empty and cannot be used for ${contextLabel}.`);
|
|
}
|
|
|
|
if (file.size > MAX_BROWSER_SPREADSHEET_BYTES) {
|
|
throw new Error(
|
|
`The selected file exceeds the ${MAX_BROWSER_SPREADSHEET_BYTES} byte limit for ${contextLabel}.`,
|
|
);
|
|
}
|
|
|
|
if (extension === XLS_EXTENSION) {
|
|
throw new Error(
|
|
"Legacy .xls files are not supported. Please resave the workbook as .xlsx or export it as .csv.",
|
|
);
|
|
}
|
|
|
|
if (extension === XLSX_EXTENSION) {
|
|
return;
|
|
}
|
|
|
|
if (allowCsv && extension === CSV_EXTENSION) {
|
|
return;
|
|
}
|
|
|
|
if (allowCsv) {
|
|
throw new Error("Unsupported file type. Please upload a .xlsx or .csv file.");
|
|
}
|
|
|
|
throw new Error("Unsupported file type. Please upload a .xlsx file.");
|
|
}
|
|
|
|
async function parseXlsxSpreadsheet(file: File): Promise<Record<string, string>[]> {
|
|
const ExcelJS = await getExcelJS();
|
|
const workbook = new ExcelJS.Workbook();
|
|
const buffer = await file.arrayBuffer();
|
|
await workbook.xlsx.load(buffer);
|
|
|
|
const worksheet = workbook.worksheets[0];
|
|
if (!worksheet) {
|
|
return [];
|
|
}
|
|
|
|
const rows: string[][] = [];
|
|
for (let rowNumber = 1; rowNumber <= worksheet.rowCount; rowNumber += 1) {
|
|
const row = worksheet.getRow(rowNumber);
|
|
const cells: string[] = [];
|
|
|
|
for (let columnNumber = 1; columnNumber <= row.cellCount; columnNumber += 1) {
|
|
cells.push(normalizeCellString(row.getCell(columnNumber).value));
|
|
}
|
|
|
|
rows.push(cells);
|
|
}
|
|
|
|
return matrixToObjects(rows, "spreadsheet import");
|
|
}
|
|
|
|
/**
|
|
* Parse a spreadsheet import file to an array of row objects.
|
|
* Keys come from the first row (headers).
|
|
*/
|
|
export async function parseSpreadsheet(file: File): Promise<Record<string, string>[]> {
|
|
assertSpreadsheetFile(file);
|
|
|
|
if (getFileExtension(file.name) === CSV_EXTENSION) {
|
|
return matrixToObjects(parseCsvMatrix(await file.text()), "spreadsheet import");
|
|
}
|
|
|
|
return parseXlsxSpreadsheet(file);
|
|
}
|
|
|
|
export function isSpreadsheetFile(file: File): boolean {
|
|
return isSupportedSpreadsheetExtension(getFileExtension(file.name));
|
|
}
|