feat(import): harden untrusted spreadsheet boundaries

This commit is contained in:
2026-03-30 08:02:52 +02:00
parent fac8c1c3a5
commit f6daf21983
13 changed files with 561 additions and 76 deletions
+1
View File
@@ -30,6 +30,7 @@
"@trpc/server": "^11.0.0",
"clsx": "^2.1.1",
"dompurify": "^3.3.3",
"exceljs": "^4.4.0",
"framer-motion": "^12.38.0",
"next": "^15.1.7",
"next-auth": "^5.0.0-beta.25",
@@ -3,6 +3,7 @@
import { useState, useRef } from "react";
import { trpc } from "~/lib/trpc/client.js";
import { parseSkillMatrixWorkbook, matchRoleName } from "~/lib/skillMatrixParser.js";
import { assertSpreadsheetFile } from "~/lib/excel.js";
import type { SkillEntry } from "@capakraken/shared";
interface ParsedEntry {
@@ -54,6 +55,7 @@ export function BatchSkillImport() {
);
try {
assertSpreadsheetFile(file, { allowCsv: false, contextLabel: "skill matrix import" });
const buffer = await file.arrayBuffer();
const result = await parseSkillMatrixWorkbook(buffer);
@@ -152,7 +154,7 @@ export function BatchSkillImport() {
</svg>
<p className="text-sm font-medium text-gray-700 dark:text-gray-300">Click to select multiple .xlsx files</p>
<p className="text-xs text-gray-400 dark:text-gray-500 mt-1">Name files after resource EID or display name for automatic matching</p>
<input ref={fileRef} type="file" accept=".xlsx,.xls" multiple className="hidden" onChange={handleFiles} />
<input ref={fileRef} type="file" accept=".xlsx" multiple className="hidden" onChange={handleFiles} />
</div>
{/* Summary */}
@@ -269,7 +269,7 @@ export function EstimateWizard({ onClose }: { onClose: () => void }) {
event.target.value = "";
if (!isSpreadsheetFile(file)) {
setScopeImportWarnings(["Unsupported file type. Please upload .xlsx, .xls, or .csv."]);
setScopeImportWarnings(["Unsupported file type. Please upload .xlsx or .csv."]);
return;
}
@@ -586,7 +586,7 @@ export function EstimateWizard({ onClose }: { onClose: () => void }) {
<div className="flex gap-2">
<label className="cursor-pointer rounded-xl border border-gray-200 px-3 py-2 text-sm text-gray-600 transition hover:border-gray-300 hover:text-gray-900">
Import XLSX
<input type="file" accept=".xlsx,.xls,.csv" onChange={handleScopeImport} className="hidden" />
<input type="file" accept=".xlsx,.csv" onChange={handleScopeImport} className="hidden" />
</label>
<button type="button" onClick={() => setScopeItems((current) => [...current, makeScope(current.length + 1)])} className="rounded-xl border border-gray-200 px-3 py-2 text-sm text-gray-600 transition hover:border-gray-300 hover:text-gray-900">
Add scope row
@@ -67,8 +67,8 @@ export function ScopeItemEditor({
<div className="space-y-4">
<div className="flex flex-wrap items-center gap-3">
<label className="cursor-pointer rounded-2xl border border-gray-300 bg-white px-4 py-2 text-sm font-medium text-gray-700 transition hover:bg-gray-50">
Import scope from XLSX
<input type="file" accept=".xlsx,.xls,.csv" className="hidden" onChange={(event) => void handleScopeImport(event)} />
Import scope from spreadsheet
<input type="file" accept=".xlsx,.csv" className="hidden" onChange={(event) => void handleScopeImport(event)} />
</label>
{scopeImportWarnings.length > 0 && (
<div className="text-xs text-amber-700">
@@ -2,7 +2,7 @@
import { useState, useRef } from "react";
import { trpc } from "~/lib/trpc/client.js";
import { parseSpreadsheet, isSpreadsheetFile } from "~/lib/excel.js";
import { assertSpreadsheetFile, parseSpreadsheet, isSpreadsheetFile } from "~/lib/excel.js";
type ImportStage = "idle" | "preview" | "importing" | "done";
@@ -48,13 +48,14 @@ export function ImportModal({ onClose }: Props) {
setResult(null);
if (!isSpreadsheetFile(file)) {
setFileError("Unsupported file type. Please upload an Excel (.xlsx, .xls) or CSV file.");
setFileError("Unsupported file type. Please upload a .xlsx or .csv file.");
return;
}
setFileName(file.name);
try {
assertSpreadsheetFile(file, { contextLabel: "resource import" });
const parsed = await parseSpreadsheet(file);
setRows(parsed);
setStage("preview");
@@ -111,7 +112,7 @@ export function ImportModal({ onClose }: Props) {
{stage === "idle" && (
<div className="space-y-4">
<p className="text-sm text-gray-600">
Upload an Excel or CSV file to import resources. The first row must contain column headers
Upload a `.xlsx` or CSV file to import resources. The first row must contain column headers
matching the resource fields (e.g.{" "}
<code className="px-1 py-0.5 bg-gray-100 rounded text-xs font-mono">
eid, displayName, email, chapter, lcrCents
@@ -127,13 +128,13 @@ export function ImportModal({ onClose }: Props) {
<svg className="w-10 h-10 text-gray-400 mb-2" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={1.5}>
<path strokeLinecap="round" strokeLinejoin="round" d="M9 13h6m-3-3v6m5 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
</svg>
<p className="text-sm text-gray-500">Click to select Excel or CSV</p>
<p className="text-xs text-gray-400 mt-1">.xlsx, .xls, .csv supported</p>
<p className="text-sm text-gray-500">Click to select `.xlsx` or CSV</p>
<p className="text-xs text-gray-400 mt-1">.xlsx, .csv supported</p>
</div>
<input
ref={fileInputRef}
type="file"
accept=".xlsx,.xls,.csv"
accept=".xlsx,.csv"
className="hidden"
onChange={handleFileChange}
/>
@@ -3,6 +3,7 @@
import { useState, useRef } from "react";
import { trpc } from "~/lib/trpc/client.js";
import { parseSkillMatrixWorkbook, matchRoleName } from "~/lib/skillMatrixParser.js";
import { assertSpreadsheetFile } from "~/lib/excel.js";
import type { SkillEntry } from "@capakraken/shared";
interface Props {
@@ -46,6 +47,7 @@ export function SkillMatrixUpload({ resourceId, isOwner, onClose, onSuccess }: P
setPreview(null);
try {
assertSpreadsheetFile(file, { allowCsv: false, contextLabel: "skill matrix import" });
const buffer = await file.arrayBuffer();
const parsed = await parseSkillMatrixWorkbook(buffer);
@@ -127,7 +129,7 @@ export function SkillMatrixUpload({ resourceId, isOwner, onClose, onSuccess }: P
<input
ref={fileRef}
type="file"
accept=".xlsx,.xls"
accept=".xlsx"
className="hidden"
onChange={handleFile}
/>
+212 -28
View File
@@ -1,41 +1,225 @@
let _xlsx: typeof import("xlsx") | null = null;
const XLSX_EXTENSION = ".xlsx";
const CSV_EXTENSION = ".csv";
const XLS_EXTENSION = ".xls";
async function getXLSX() {
if (!_xlsx) {
_xlsx = await import("xlsx");
export const MAX_BROWSER_SPREADSHEET_BYTES = 10 * 1024 * 1024;
type ExcelJsModule = typeof import("exceljs");
let _excelJs: ExcelJsModule | null = null;
function getFileExtension(fileName: string): string {
const dotIndex = fileName.lastIndexOf(".");
if (dotIndex < 0) {
return "";
}
return _xlsx;
return fileName.slice(dotIndex).toLowerCase();
}
function isSupportedSpreadsheetExtension(extension: string): boolean {
return extension === XLSX_EXTENSION || extension === CSV_EXTENSION;
}
function normalizeCellString(value: unknown): string {
if (value === undefined || value === null) {
return "";
}
if (value instanceof Date) {
return value.toISOString();
}
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
return String(value);
}
if (typeof value === "object") {
const record = value as Record<string, unknown>;
if ("result" in record) {
return normalizeCellString(record.result);
}
if ("text" in record && typeof record.text === "string") {
return record.text;
}
if ("hyperlink" in record && typeof record.hyperlink === "string") {
return record.hyperlink;
}
if ("richText" in record && Array.isArray(record.richText)) {
return record.richText
.map((part) => {
if (part && typeof part === "object" && "text" in part) {
const text = (part as { text?: unknown }).text;
return typeof text === "string" ? text : "";
}
return "";
})
.join("");
}
if ("error" in record && typeof record.error === "string") {
return record.error;
}
}
return String(value);
}
function parseCsvMatrix(input: string): string[][] {
const text = input.replace(/^\uFEFF/u, "");
const rows: string[][] = [];
let currentRow: string[] = [];
let currentCell = "";
let inQuotes = false;
for (let index = 0; index < text.length; index += 1) {
const character = text[index];
const nextCharacter = text[index + 1];
if (character === "\"") {
if (inQuotes && nextCharacter === "\"") {
currentCell += "\"";
index += 1;
} else {
inQuotes = !inQuotes;
}
continue;
}
if (!inQuotes && character === ",") {
currentRow.push(currentCell);
currentCell = "";
continue;
}
if (!inQuotes && (character === "\n" || character === "\r")) {
if (character === "\r" && nextCharacter === "\n") {
index += 1;
}
currentRow.push(currentCell);
rows.push(currentRow);
currentRow = [];
currentCell = "";
continue;
}
currentCell += character;
}
if (currentCell.length > 0 || currentRow.length > 0) {
currentRow.push(currentCell);
rows.push(currentRow);
}
return rows;
}
function matrixToObjects(rows: string[][]): Record<string, string>[] {
const headers = (rows[0] ?? []).map((header) => header.trim());
if (headers.length === 0) {
return [];
}
return rows
.slice(1)
.filter((row) => row.some((value) => value.trim() !== ""))
.map((row) =>
headers.reduce<Record<string, string>>((record, header, index) => {
record[header] = row[index] ?? "";
return record;
}, {}),
);
}
async function getExcelJS() {
if (!_excelJs) {
_excelJs = await import("exceljs");
}
return _excelJs;
}
export function assertSpreadsheetFile(
file: File,
options?: { allowCsv?: boolean; contextLabel?: string },
): void {
const extension = getFileExtension(file.name);
const allowCsv = options?.allowCsv ?? true;
const contextLabel = options?.contextLabel ?? "spreadsheet import";
if (file.size <= 0) {
throw new Error(`The selected file is empty and cannot be used for ${contextLabel}.`);
}
if (file.size > MAX_BROWSER_SPREADSHEET_BYTES) {
throw new Error(
`The selected file exceeds the ${MAX_BROWSER_SPREADSHEET_BYTES} byte limit for ${contextLabel}.`,
);
}
if (extension === XLS_EXTENSION) {
throw new Error(
"Legacy .xls files are not supported. Please resave the workbook as .xlsx or export it as .csv.",
);
}
if (extension === XLSX_EXTENSION) {
return;
}
if (allowCsv && extension === CSV_EXTENSION) {
return;
}
if (allowCsv) {
throw new Error("Unsupported file type. Please upload a .xlsx or .csv file.");
}
throw new Error("Unsupported file type. Please upload a .xlsx file.");
}
async function parseXlsxSpreadsheet(file: File): Promise<Record<string, string>[]> {
const ExcelJS = await getExcelJS();
const workbook = new ExcelJS.Workbook();
const buffer = await file.arrayBuffer();
await workbook.xlsx.load(buffer);
const worksheet = workbook.worksheets[0];
if (!worksheet) {
return [];
}
const rows: string[][] = [];
for (let rowNumber = 1; rowNumber <= worksheet.rowCount; rowNumber += 1) {
const row = worksheet.getRow(rowNumber);
const cells: string[] = [];
for (let columnNumber = 1; columnNumber <= row.cellCount; columnNumber += 1) {
cells.push(normalizeCellString(row.getCell(columnNumber).value));
}
rows.push(cells);
}
return matrixToObjects(rows);
}
/**
* Parse an Excel (.xlsx, .xls) or CSV file to an array of row objects.
* Parse a spreadsheet import file to an array of row objects.
* Keys come from the first row (headers).
*/
export async function parseSpreadsheet(file: File): Promise<Record<string, string>[]> {
const XLSX = await getXLSX();
const buffer = await file.arrayBuffer();
const data = new Uint8Array(buffer);
const workbook = XLSX.read(data, { type: "array" });
const sheetName = workbook.SheetNames[0];
if (!sheetName) {
return [];
assertSpreadsheetFile(file);
if (getFileExtension(file.name) === CSV_EXTENSION) {
return matrixToObjects(parseCsvMatrix(await file.text()));
}
const sheet = workbook.Sheets[sheetName];
if (!sheet) {
return [];
}
return XLSX.utils.sheet_to_json<Record<string, string>>(sheet, {
raw: false,
defval: "",
});
return parseXlsxSpreadsheet(file);
}
export function isSpreadsheetFile(file: File): boolean {
return (
file.type === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ||
file.type === "application/vnd.ms-excel" ||
file.name.endsWith(".xlsx") ||
file.name.endsWith(".xls") ||
file.name.endsWith(".csv")
);
return isSupportedSpreadsheetExtension(getFileExtension(file.name));
}
+98 -22
View File
@@ -1,12 +1,99 @@
import type { SkillEntry } from "@capakraken/shared";
let _xlsx: typeof import("xlsx") | null = null;
type ExcelJsModule = typeof import("exceljs");
async function getXLSX() {
if (!_xlsx) {
_xlsx = await import("xlsx");
let _excelJs: ExcelJsModule | null = null;
async function getExcelJS() {
if (!_excelJs) {
_excelJs = await import("exceljs");
}
return _xlsx;
return _excelJs;
}
function normalizeCellString(value: unknown): string {
if (value === undefined || value === null) {
return "";
}
if (value instanceof Date) {
return value.toISOString();
}
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
return String(value);
}
if (typeof value === "object") {
const record = value as Record<string, unknown>;
if ("result" in record) {
return normalizeCellString(record.result);
}
if ("text" in record && typeof record.text === "string") {
return record.text;
}
if ("hyperlink" in record && typeof record.hyperlink === "string") {
return record.hyperlink;
}
if ("richText" in record && Array.isArray(record.richText)) {
return record.richText
.map((part) => {
if (part && typeof part === "object" && "text" in part) {
const text = (part as { text?: unknown }).text;
return typeof text === "string" ? text : "";
}
return "";
})
.join("");
}
}
return String(value);
}
function worksheetToRowObjects(
worksheet: {
rowCount: number;
getRow: (rowNumber: number) => {
cellCount: number;
getCell: (columnNumber: number) => { value: unknown };
};
} | undefined,
): Record<string, string>[] {
if (!worksheet) {
return [];
}
const rows: string[][] = [];
for (let rowNumber = 1; rowNumber <= worksheet.rowCount; rowNumber += 1) {
const row = worksheet.getRow(rowNumber);
const cells: string[] = [];
for (let columnNumber = 1; columnNumber <= row.cellCount; columnNumber += 1) {
cells.push(normalizeCellString(row.getCell(columnNumber).value));
}
rows.push(cells);
}
const headers = (rows[0] ?? []).map((header) => header.trim());
if (headers.length === 0) {
return [];
}
return rows
.slice(1)
.filter((row) => row.some((value) => value.trim() !== ""))
.map((row) =>
headers.reduce<Record<string, string>>((record, header, index) => {
record[header] = row[index] ?? "";
return record;
}, {}),
);
}
export interface ParsedEmployeeInfo {
@@ -91,24 +178,13 @@ function parseSkillSheet(rows: Record<string, string>[], mainSkillSet: Set<strin
* Returns ParsedSkillMatrix with employeeInfo and merged skills array.
*/
export async function parseSkillMatrixWorkbook(data: ArrayBuffer): Promise<ParsedSkillMatrix> {
const XLSX = await getXLSX();
const workbook = XLSX.read(new Uint8Array(data), { type: "array" });
const ExcelJS = await getExcelJS();
const workbook = new ExcelJS.Workbook();
await workbook.xlsx.load(data);
const employeeSheet = workbook.Sheets["Employee Information"];
const softwareSheet = workbook.Sheets["Software Skills"];
const technicalSheet = workbook.Sheets["Technical Skillset"];
const employeeRows = employeeSheet
? XLSX.utils.sheet_to_json<Record<string, string>>(employeeSheet, { raw: false, defval: "" })
: [];
const softwareRows = softwareSheet
? XLSX.utils.sheet_to_json<Record<string, string>>(softwareSheet, { raw: false, defval: "" })
: [];
const technicalRows = technicalSheet
? XLSX.utils.sheet_to_json<Record<string, string>>(technicalSheet, { raw: false, defval: "" })
: [];
const employeeRows = worksheetToRowObjects(workbook.getWorksheet("Employee Information"));
const softwareRows = worksheetToRowObjects(workbook.getWorksheet("Software Skills"));
const technicalRows = worksheetToRowObjects(workbook.getWorksheet("Technical Skillset"));
const employeeInfo = parseEmployeeInfo(employeeRows);