feat(import): harden untrusted spreadsheet boundaries
This commit is contained in:
@@ -30,6 +30,7 @@
|
|||||||
"@trpc/server": "^11.0.0",
|
"@trpc/server": "^11.0.0",
|
||||||
"clsx": "^2.1.1",
|
"clsx": "^2.1.1",
|
||||||
"dompurify": "^3.3.3",
|
"dompurify": "^3.3.3",
|
||||||
|
"exceljs": "^4.4.0",
|
||||||
"framer-motion": "^12.38.0",
|
"framer-motion": "^12.38.0",
|
||||||
"next": "^15.1.7",
|
"next": "^15.1.7",
|
||||||
"next-auth": "^5.0.0-beta.25",
|
"next-auth": "^5.0.0-beta.25",
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
import { useState, useRef } from "react";
|
import { useState, useRef } from "react";
|
||||||
import { trpc } from "~/lib/trpc/client.js";
|
import { trpc } from "~/lib/trpc/client.js";
|
||||||
import { parseSkillMatrixWorkbook, matchRoleName } from "~/lib/skillMatrixParser.js";
|
import { parseSkillMatrixWorkbook, matchRoleName } from "~/lib/skillMatrixParser.js";
|
||||||
|
import { assertSpreadsheetFile } from "~/lib/excel.js";
|
||||||
import type { SkillEntry } from "@capakraken/shared";
|
import type { SkillEntry } from "@capakraken/shared";
|
||||||
|
|
||||||
interface ParsedEntry {
|
interface ParsedEntry {
|
||||||
@@ -54,6 +55,7 @@ export function BatchSkillImport() {
|
|||||||
);
|
);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
assertSpreadsheetFile(file, { allowCsv: false, contextLabel: "skill matrix import" });
|
||||||
const buffer = await file.arrayBuffer();
|
const buffer = await file.arrayBuffer();
|
||||||
const result = await parseSkillMatrixWorkbook(buffer);
|
const result = await parseSkillMatrixWorkbook(buffer);
|
||||||
|
|
||||||
@@ -152,7 +154,7 @@ export function BatchSkillImport() {
|
|||||||
</svg>
|
</svg>
|
||||||
<p className="text-sm font-medium text-gray-700 dark:text-gray-300">Click to select multiple .xlsx files</p>
|
<p className="text-sm font-medium text-gray-700 dark:text-gray-300">Click to select multiple .xlsx files</p>
|
||||||
<p className="text-xs text-gray-400 dark:text-gray-500 mt-1">Name files after resource EID or display name for automatic matching</p>
|
<p className="text-xs text-gray-400 dark:text-gray-500 mt-1">Name files after resource EID or display name for automatic matching</p>
|
||||||
<input ref={fileRef} type="file" accept=".xlsx,.xls" multiple className="hidden" onChange={handleFiles} />
|
<input ref={fileRef} type="file" accept=".xlsx" multiple className="hidden" onChange={handleFiles} />
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* Summary */}
|
{/* Summary */}
|
||||||
|
|||||||
@@ -269,7 +269,7 @@ export function EstimateWizard({ onClose }: { onClose: () => void }) {
|
|||||||
event.target.value = "";
|
event.target.value = "";
|
||||||
|
|
||||||
if (!isSpreadsheetFile(file)) {
|
if (!isSpreadsheetFile(file)) {
|
||||||
setScopeImportWarnings(["Unsupported file type. Please upload .xlsx, .xls, or .csv."]);
|
setScopeImportWarnings(["Unsupported file type. Please upload .xlsx or .csv."]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -586,7 +586,7 @@ export function EstimateWizard({ onClose }: { onClose: () => void }) {
|
|||||||
<div className="flex gap-2">
|
<div className="flex gap-2">
|
||||||
<label className="cursor-pointer rounded-xl border border-gray-200 px-3 py-2 text-sm text-gray-600 transition hover:border-gray-300 hover:text-gray-900">
|
<label className="cursor-pointer rounded-xl border border-gray-200 px-3 py-2 text-sm text-gray-600 transition hover:border-gray-300 hover:text-gray-900">
|
||||||
Import XLSX
|
Import XLSX
|
||||||
<input type="file" accept=".xlsx,.xls,.csv" onChange={handleScopeImport} className="hidden" />
|
<input type="file" accept=".xlsx,.csv" onChange={handleScopeImport} className="hidden" />
|
||||||
</label>
|
</label>
|
||||||
<button type="button" onClick={() => setScopeItems((current) => [...current, makeScope(current.length + 1)])} className="rounded-xl border border-gray-200 px-3 py-2 text-sm text-gray-600 transition hover:border-gray-300 hover:text-gray-900">
|
<button type="button" onClick={() => setScopeItems((current) => [...current, makeScope(current.length + 1)])} className="rounded-xl border border-gray-200 px-3 py-2 text-sm text-gray-600 transition hover:border-gray-300 hover:text-gray-900">
|
||||||
Add scope row
|
Add scope row
|
||||||
|
|||||||
@@ -67,8 +67,8 @@ export function ScopeItemEditor({
|
|||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
<div className="flex flex-wrap items-center gap-3">
|
<div className="flex flex-wrap items-center gap-3">
|
||||||
<label className="cursor-pointer rounded-2xl border border-gray-300 bg-white px-4 py-2 text-sm font-medium text-gray-700 transition hover:bg-gray-50">
|
<label className="cursor-pointer rounded-2xl border border-gray-300 bg-white px-4 py-2 text-sm font-medium text-gray-700 transition hover:bg-gray-50">
|
||||||
Import scope from XLSX
|
Import scope from spreadsheet
|
||||||
<input type="file" accept=".xlsx,.xls,.csv" className="hidden" onChange={(event) => void handleScopeImport(event)} />
|
<input type="file" accept=".xlsx,.csv" className="hidden" onChange={(event) => void handleScopeImport(event)} />
|
||||||
</label>
|
</label>
|
||||||
{scopeImportWarnings.length > 0 && (
|
{scopeImportWarnings.length > 0 && (
|
||||||
<div className="text-xs text-amber-700">
|
<div className="text-xs text-amber-700">
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import { useState, useRef } from "react";
|
import { useState, useRef } from "react";
|
||||||
import { trpc } from "~/lib/trpc/client.js";
|
import { trpc } from "~/lib/trpc/client.js";
|
||||||
import { parseSpreadsheet, isSpreadsheetFile } from "~/lib/excel.js";
|
import { assertSpreadsheetFile, parseSpreadsheet, isSpreadsheetFile } from "~/lib/excel.js";
|
||||||
|
|
||||||
type ImportStage = "idle" | "preview" | "importing" | "done";
|
type ImportStage = "idle" | "preview" | "importing" | "done";
|
||||||
|
|
||||||
@@ -48,13 +48,14 @@ export function ImportModal({ onClose }: Props) {
|
|||||||
setResult(null);
|
setResult(null);
|
||||||
|
|
||||||
if (!isSpreadsheetFile(file)) {
|
if (!isSpreadsheetFile(file)) {
|
||||||
setFileError("Unsupported file type. Please upload an Excel (.xlsx, .xls) or CSV file.");
|
setFileError("Unsupported file type. Please upload a .xlsx or .csv file.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
setFileName(file.name);
|
setFileName(file.name);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
assertSpreadsheetFile(file, { contextLabel: "resource import" });
|
||||||
const parsed = await parseSpreadsheet(file);
|
const parsed = await parseSpreadsheet(file);
|
||||||
setRows(parsed);
|
setRows(parsed);
|
||||||
setStage("preview");
|
setStage("preview");
|
||||||
@@ -111,7 +112,7 @@ export function ImportModal({ onClose }: Props) {
|
|||||||
{stage === "idle" && (
|
{stage === "idle" && (
|
||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
<p className="text-sm text-gray-600">
|
<p className="text-sm text-gray-600">
|
||||||
Upload an Excel or CSV file to import resources. The first row must contain column headers
|
Upload a `.xlsx` or CSV file to import resources. The first row must contain column headers
|
||||||
matching the resource fields (e.g.{" "}
|
matching the resource fields (e.g.{" "}
|
||||||
<code className="px-1 py-0.5 bg-gray-100 rounded text-xs font-mono">
|
<code className="px-1 py-0.5 bg-gray-100 rounded text-xs font-mono">
|
||||||
eid, displayName, email, chapter, lcrCents
|
eid, displayName, email, chapter, lcrCents
|
||||||
@@ -127,13 +128,13 @@ export function ImportModal({ onClose }: Props) {
|
|||||||
<svg className="w-10 h-10 text-gray-400 mb-2" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={1.5}>
|
<svg className="w-10 h-10 text-gray-400 mb-2" fill="none" viewBox="0 0 24 24" stroke="currentColor" strokeWidth={1.5}>
|
||||||
<path strokeLinecap="round" strokeLinejoin="round" d="M9 13h6m-3-3v6m5 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
|
<path strokeLinecap="round" strokeLinejoin="round" d="M9 13h6m-3-3v6m5 5H7a2 2 0 01-2-2V5a2 2 0 012-2h5.586a1 1 0 01.707.293l5.414 5.414a1 1 0 01.293.707V19a2 2 0 01-2 2z" />
|
||||||
</svg>
|
</svg>
|
||||||
<p className="text-sm text-gray-500">Click to select Excel or CSV</p>
|
<p className="text-sm text-gray-500">Click to select `.xlsx` or CSV</p>
|
||||||
<p className="text-xs text-gray-400 mt-1">.xlsx, .xls, .csv supported</p>
|
<p className="text-xs text-gray-400 mt-1">.xlsx, .csv supported</p>
|
||||||
</div>
|
</div>
|
||||||
<input
|
<input
|
||||||
ref={fileInputRef}
|
ref={fileInputRef}
|
||||||
type="file"
|
type="file"
|
||||||
accept=".xlsx,.xls,.csv"
|
accept=".xlsx,.csv"
|
||||||
className="hidden"
|
className="hidden"
|
||||||
onChange={handleFileChange}
|
onChange={handleFileChange}
|
||||||
/>
|
/>
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
import { useState, useRef } from "react";
|
import { useState, useRef } from "react";
|
||||||
import { trpc } from "~/lib/trpc/client.js";
|
import { trpc } from "~/lib/trpc/client.js";
|
||||||
import { parseSkillMatrixWorkbook, matchRoleName } from "~/lib/skillMatrixParser.js";
|
import { parseSkillMatrixWorkbook, matchRoleName } from "~/lib/skillMatrixParser.js";
|
||||||
|
import { assertSpreadsheetFile } from "~/lib/excel.js";
|
||||||
import type { SkillEntry } from "@capakraken/shared";
|
import type { SkillEntry } from "@capakraken/shared";
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
@@ -46,6 +47,7 @@ export function SkillMatrixUpload({ resourceId, isOwner, onClose, onSuccess }: P
|
|||||||
setPreview(null);
|
setPreview(null);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
assertSpreadsheetFile(file, { allowCsv: false, contextLabel: "skill matrix import" });
|
||||||
const buffer = await file.arrayBuffer();
|
const buffer = await file.arrayBuffer();
|
||||||
const parsed = await parseSkillMatrixWorkbook(buffer);
|
const parsed = await parseSkillMatrixWorkbook(buffer);
|
||||||
|
|
||||||
@@ -127,7 +129,7 @@ export function SkillMatrixUpload({ resourceId, isOwner, onClose, onSuccess }: P
|
|||||||
<input
|
<input
|
||||||
ref={fileRef}
|
ref={fileRef}
|
||||||
type="file"
|
type="file"
|
||||||
accept=".xlsx,.xls"
|
accept=".xlsx"
|
||||||
className="hidden"
|
className="hidden"
|
||||||
onChange={handleFile}
|
onChange={handleFile}
|
||||||
/>
|
/>
|
||||||
|
|||||||
+212
-28
@@ -1,41 +1,225 @@
|
|||||||
let _xlsx: typeof import("xlsx") | null = null;
|
const XLSX_EXTENSION = ".xlsx";
|
||||||
|
const CSV_EXTENSION = ".csv";
|
||||||
|
const XLS_EXTENSION = ".xls";
|
||||||
|
|
||||||
async function getXLSX() {
|
export const MAX_BROWSER_SPREADSHEET_BYTES = 10 * 1024 * 1024;
|
||||||
if (!_xlsx) {
|
|
||||||
_xlsx = await import("xlsx");
|
type ExcelJsModule = typeof import("exceljs");
|
||||||
|
let _excelJs: ExcelJsModule | null = null;
|
||||||
|
|
||||||
|
function getFileExtension(fileName: string): string {
|
||||||
|
const dotIndex = fileName.lastIndexOf(".");
|
||||||
|
if (dotIndex < 0) {
|
||||||
|
return "";
|
||||||
}
|
}
|
||||||
return _xlsx;
|
|
||||||
|
return fileName.slice(dotIndex).toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
function isSupportedSpreadsheetExtension(extension: string): boolean {
|
||||||
|
return extension === XLSX_EXTENSION || extension === CSV_EXTENSION;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeCellString(value: unknown): string {
|
||||||
|
if (value === undefined || value === null) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value instanceof Date) {
|
||||||
|
return value.toISOString();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
||||||
|
return String(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof value === "object") {
|
||||||
|
const record = value as Record<string, unknown>;
|
||||||
|
|
||||||
|
if ("result" in record) {
|
||||||
|
return normalizeCellString(record.result);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("text" in record && typeof record.text === "string") {
|
||||||
|
return record.text;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("hyperlink" in record && typeof record.hyperlink === "string") {
|
||||||
|
return record.hyperlink;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("richText" in record && Array.isArray(record.richText)) {
|
||||||
|
return record.richText
|
||||||
|
.map((part) => {
|
||||||
|
if (part && typeof part === "object" && "text" in part) {
|
||||||
|
const text = (part as { text?: unknown }).text;
|
||||||
|
return typeof text === "string" ? text : "";
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
})
|
||||||
|
.join("");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("error" in record && typeof record.error === "string") {
|
||||||
|
return record.error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return String(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseCsvMatrix(input: string): string[][] {
|
||||||
|
const text = input.replace(/^\uFEFF/u, "");
|
||||||
|
const rows: string[][] = [];
|
||||||
|
let currentRow: string[] = [];
|
||||||
|
let currentCell = "";
|
||||||
|
let inQuotes = false;
|
||||||
|
|
||||||
|
for (let index = 0; index < text.length; index += 1) {
|
||||||
|
const character = text[index];
|
||||||
|
const nextCharacter = text[index + 1];
|
||||||
|
|
||||||
|
if (character === "\"") {
|
||||||
|
if (inQuotes && nextCharacter === "\"") {
|
||||||
|
currentCell += "\"";
|
||||||
|
index += 1;
|
||||||
|
} else {
|
||||||
|
inQuotes = !inQuotes;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!inQuotes && character === ",") {
|
||||||
|
currentRow.push(currentCell);
|
||||||
|
currentCell = "";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!inQuotes && (character === "\n" || character === "\r")) {
|
||||||
|
if (character === "\r" && nextCharacter === "\n") {
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
currentRow.push(currentCell);
|
||||||
|
rows.push(currentRow);
|
||||||
|
currentRow = [];
|
||||||
|
currentCell = "";
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
currentCell += character;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentCell.length > 0 || currentRow.length > 0) {
|
||||||
|
currentRow.push(currentCell);
|
||||||
|
rows.push(currentRow);
|
||||||
|
}
|
||||||
|
|
||||||
|
return rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
function matrixToObjects(rows: string[][]): Record<string, string>[] {
|
||||||
|
const headers = (rows[0] ?? []).map((header) => header.trim());
|
||||||
|
if (headers.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return rows
|
||||||
|
.slice(1)
|
||||||
|
.filter((row) => row.some((value) => value.trim() !== ""))
|
||||||
|
.map((row) =>
|
||||||
|
headers.reduce<Record<string, string>>((record, header, index) => {
|
||||||
|
record[header] = row[index] ?? "";
|
||||||
|
return record;
|
||||||
|
}, {}),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function getExcelJS() {
|
||||||
|
if (!_excelJs) {
|
||||||
|
_excelJs = await import("exceljs");
|
||||||
|
}
|
||||||
|
return _excelJs;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function assertSpreadsheetFile(
|
||||||
|
file: File,
|
||||||
|
options?: { allowCsv?: boolean; contextLabel?: string },
|
||||||
|
): void {
|
||||||
|
const extension = getFileExtension(file.name);
|
||||||
|
const allowCsv = options?.allowCsv ?? true;
|
||||||
|
const contextLabel = options?.contextLabel ?? "spreadsheet import";
|
||||||
|
|
||||||
|
if (file.size <= 0) {
|
||||||
|
throw new Error(`The selected file is empty and cannot be used for ${contextLabel}.`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (file.size > MAX_BROWSER_SPREADSHEET_BYTES) {
|
||||||
|
throw new Error(
|
||||||
|
`The selected file exceeds the ${MAX_BROWSER_SPREADSHEET_BYTES} byte limit for ${contextLabel}.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (extension === XLS_EXTENSION) {
|
||||||
|
throw new Error(
|
||||||
|
"Legacy .xls files are not supported. Please resave the workbook as .xlsx or export it as .csv.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (extension === XLSX_EXTENSION) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allowCsv && extension === CSV_EXTENSION) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allowCsv) {
|
||||||
|
throw new Error("Unsupported file type. Please upload a .xlsx or .csv file.");
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error("Unsupported file type. Please upload a .xlsx file.");
|
||||||
|
}
|
||||||
|
|
||||||
|
async function parseXlsxSpreadsheet(file: File): Promise<Record<string, string>[]> {
|
||||||
|
const ExcelJS = await getExcelJS();
|
||||||
|
const workbook = new ExcelJS.Workbook();
|
||||||
|
const buffer = await file.arrayBuffer();
|
||||||
|
await workbook.xlsx.load(buffer);
|
||||||
|
|
||||||
|
const worksheet = workbook.worksheets[0];
|
||||||
|
if (!worksheet) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const rows: string[][] = [];
|
||||||
|
for (let rowNumber = 1; rowNumber <= worksheet.rowCount; rowNumber += 1) {
|
||||||
|
const row = worksheet.getRow(rowNumber);
|
||||||
|
const cells: string[] = [];
|
||||||
|
|
||||||
|
for (let columnNumber = 1; columnNumber <= row.cellCount; columnNumber += 1) {
|
||||||
|
cells.push(normalizeCellString(row.getCell(columnNumber).value));
|
||||||
|
}
|
||||||
|
|
||||||
|
rows.push(cells);
|
||||||
|
}
|
||||||
|
|
||||||
|
return matrixToObjects(rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse an Excel (.xlsx, .xls) or CSV file to an array of row objects.
|
* Parse a spreadsheet import file to an array of row objects.
|
||||||
* Keys come from the first row (headers).
|
* Keys come from the first row (headers).
|
||||||
*/
|
*/
|
||||||
export async function parseSpreadsheet(file: File): Promise<Record<string, string>[]> {
|
export async function parseSpreadsheet(file: File): Promise<Record<string, string>[]> {
|
||||||
const XLSX = await getXLSX();
|
assertSpreadsheetFile(file);
|
||||||
const buffer = await file.arrayBuffer();
|
|
||||||
const data = new Uint8Array(buffer);
|
if (getFileExtension(file.name) === CSV_EXTENSION) {
|
||||||
const workbook = XLSX.read(data, { type: "array" });
|
return matrixToObjects(parseCsvMatrix(await file.text()));
|
||||||
const sheetName = workbook.SheetNames[0];
|
|
||||||
if (!sheetName) {
|
|
||||||
return [];
|
|
||||||
}
|
}
|
||||||
const sheet = workbook.Sheets[sheetName];
|
|
||||||
if (!sheet) {
|
return parseXlsxSpreadsheet(file);
|
||||||
return [];
|
|
||||||
}
|
|
||||||
return XLSX.utils.sheet_to_json<Record<string, string>>(sheet, {
|
|
||||||
raw: false,
|
|
||||||
defval: "",
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export function isSpreadsheetFile(file: File): boolean {
|
export function isSpreadsheetFile(file: File): boolean {
|
||||||
return (
|
return isSupportedSpreadsheetExtension(getFileExtension(file.name));
|
||||||
file.type === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ||
|
|
||||||
file.type === "application/vnd.ms-excel" ||
|
|
||||||
file.name.endsWith(".xlsx") ||
|
|
||||||
file.name.endsWith(".xls") ||
|
|
||||||
file.name.endsWith(".csv")
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,12 +1,99 @@
|
|||||||
import type { SkillEntry } from "@capakraken/shared";
|
import type { SkillEntry } from "@capakraken/shared";
|
||||||
|
|
||||||
let _xlsx: typeof import("xlsx") | null = null;
|
type ExcelJsModule = typeof import("exceljs");
|
||||||
|
|
||||||
async function getXLSX() {
|
let _excelJs: ExcelJsModule | null = null;
|
||||||
if (!_xlsx) {
|
|
||||||
_xlsx = await import("xlsx");
|
async function getExcelJS() {
|
||||||
|
if (!_excelJs) {
|
||||||
|
_excelJs = await import("exceljs");
|
||||||
}
|
}
|
||||||
return _xlsx;
|
return _excelJs;
|
||||||
|
}
|
||||||
|
|
||||||
|
function normalizeCellString(value: unknown): string {
|
||||||
|
if (value === undefined || value === null) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
if (value instanceof Date) {
|
||||||
|
return value.toISOString();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
|
||||||
|
return String(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (typeof value === "object") {
|
||||||
|
const record = value as Record<string, unknown>;
|
||||||
|
|
||||||
|
if ("result" in record) {
|
||||||
|
return normalizeCellString(record.result);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("text" in record && typeof record.text === "string") {
|
||||||
|
return record.text;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("hyperlink" in record && typeof record.hyperlink === "string") {
|
||||||
|
return record.hyperlink;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("richText" in record && Array.isArray(record.richText)) {
|
||||||
|
return record.richText
|
||||||
|
.map((part) => {
|
||||||
|
if (part && typeof part === "object" && "text" in part) {
|
||||||
|
const text = (part as { text?: unknown }).text;
|
||||||
|
return typeof text === "string" ? text : "";
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
})
|
||||||
|
.join("");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return String(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
function worksheetToRowObjects(
|
||||||
|
worksheet: {
|
||||||
|
rowCount: number;
|
||||||
|
getRow: (rowNumber: number) => {
|
||||||
|
cellCount: number;
|
||||||
|
getCell: (columnNumber: number) => { value: unknown };
|
||||||
|
};
|
||||||
|
} | undefined,
|
||||||
|
): Record<string, string>[] {
|
||||||
|
if (!worksheet) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const rows: string[][] = [];
|
||||||
|
for (let rowNumber = 1; rowNumber <= worksheet.rowCount; rowNumber += 1) {
|
||||||
|
const row = worksheet.getRow(rowNumber);
|
||||||
|
const cells: string[] = [];
|
||||||
|
|
||||||
|
for (let columnNumber = 1; columnNumber <= row.cellCount; columnNumber += 1) {
|
||||||
|
cells.push(normalizeCellString(row.getCell(columnNumber).value));
|
||||||
|
}
|
||||||
|
|
||||||
|
rows.push(cells);
|
||||||
|
}
|
||||||
|
|
||||||
|
const headers = (rows[0] ?? []).map((header) => header.trim());
|
||||||
|
if (headers.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
return rows
|
||||||
|
.slice(1)
|
||||||
|
.filter((row) => row.some((value) => value.trim() !== ""))
|
||||||
|
.map((row) =>
|
||||||
|
headers.reduce<Record<string, string>>((record, header, index) => {
|
||||||
|
record[header] = row[index] ?? "";
|
||||||
|
return record;
|
||||||
|
}, {}),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ParsedEmployeeInfo {
|
export interface ParsedEmployeeInfo {
|
||||||
@@ -91,24 +178,13 @@ function parseSkillSheet(rows: Record<string, string>[], mainSkillSet: Set<strin
|
|||||||
* Returns ParsedSkillMatrix with employeeInfo and merged skills array.
|
* Returns ParsedSkillMatrix with employeeInfo and merged skills array.
|
||||||
*/
|
*/
|
||||||
export async function parseSkillMatrixWorkbook(data: ArrayBuffer): Promise<ParsedSkillMatrix> {
|
export async function parseSkillMatrixWorkbook(data: ArrayBuffer): Promise<ParsedSkillMatrix> {
|
||||||
const XLSX = await getXLSX();
|
const ExcelJS = await getExcelJS();
|
||||||
const workbook = XLSX.read(new Uint8Array(data), { type: "array" });
|
const workbook = new ExcelJS.Workbook();
|
||||||
|
await workbook.xlsx.load(data);
|
||||||
|
|
||||||
const employeeSheet = workbook.Sheets["Employee Information"];
|
const employeeRows = worksheetToRowObjects(workbook.getWorksheet("Employee Information"));
|
||||||
const softwareSheet = workbook.Sheets["Software Skills"];
|
const softwareRows = worksheetToRowObjects(workbook.getWorksheet("Software Skills"));
|
||||||
const technicalSheet = workbook.Sheets["Technical Skillset"];
|
const technicalRows = worksheetToRowObjects(workbook.getWorksheet("Technical Skillset"));
|
||||||
|
|
||||||
const employeeRows = employeeSheet
|
|
||||||
? XLSX.utils.sheet_to_json<Record<string, string>>(employeeSheet, { raw: false, defval: "" })
|
|
||||||
: [];
|
|
||||||
|
|
||||||
const softwareRows = softwareSheet
|
|
||||||
? XLSX.utils.sheet_to_json<Record<string, string>>(softwareSheet, { raw: false, defval: "" })
|
|
||||||
: [];
|
|
||||||
|
|
||||||
const technicalRows = technicalSheet
|
|
||||||
? XLSX.utils.sheet_to_json<Record<string, string>>(technicalSheet, { raw: false, defval: "" })
|
|
||||||
: [];
|
|
||||||
|
|
||||||
const employeeInfo = parseEmployeeInfo(employeeRows);
|
const employeeInfo = parseEmployeeInfo(employeeRows);
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,46 @@
|
|||||||
|
# Import Hardening
|
||||||
|
|
||||||
|
**Date:** 2026-03-30
|
||||||
|
**Purpose:** Define the safe parser boundary for untrusted spreadsheet imports.
|
||||||
|
|
||||||
|
## Decision
|
||||||
|
|
||||||
|
- Untrusted workbook imports no longer accept legacy `.xls`.
|
||||||
|
- Server-side dispo imports accept only `.xlsx` files.
|
||||||
|
- Browser-side ad hoc imports accept `.xlsx` and `.csv`.
|
||||||
|
- Trusted export generation may still use `xlsx` until the export paths are migrated separately.
|
||||||
|
|
||||||
|
## Server Boundary
|
||||||
|
|
||||||
|
The dispo-import reader in [read-workbook.ts](/home/hartmut/Documents/Copilot/capakraken/packages/application/src/use-cases/dispo-import/read-workbook.ts) now enforces:
|
||||||
|
|
||||||
|
- normalized filesystem paths before reading
|
||||||
|
- regular-file checks
|
||||||
|
- non-empty file checks
|
||||||
|
- a hard size limit of `15 MiB`
|
||||||
|
- `.xlsx`-only parsing behind a hardened server-side parser boundary
|
||||||
|
|
||||||
|
The API entry points in [dispo.ts](/home/hartmut/Documents/Copilot/capakraken/packages/api/src/router/dispo.ts) reject non-`.xlsx` workbook paths before staging or validation begins.
|
||||||
|
|
||||||
|
## Browser Boundary
|
||||||
|
|
||||||
|
The browser import helpers in [excel.ts](/home/hartmut/Documents/Copilot/capakraken/apps/web/src/lib/excel.ts) and [skillMatrixParser.ts](/home/hartmut/Documents/Copilot/capakraken/apps/web/src/lib/skillMatrixParser.ts) now enforce:
|
||||||
|
|
||||||
|
- a hard client-side file size limit of `10 MiB`
|
||||||
|
- explicit rejection of legacy `.xls`
|
||||||
|
- `.xlsx` parsing through `exceljs`
|
||||||
|
- `.csv` parsing through a local parser for simple tabular imports
|
||||||
|
|
||||||
|
Affected upload flows:
|
||||||
|
|
||||||
|
- resource CSV/XLSX import
|
||||||
|
- estimate scope spreadsheet import
|
||||||
|
- single skill-matrix import
|
||||||
|
- batch skill-matrix import
|
||||||
|
|
||||||
|
## Rationale
|
||||||
|
|
||||||
|
- `.xls` support keeps the old binary workbook format in the untrusted path without enough payoff.
|
||||||
|
- the server path keeps compatibility-first `.xlsx` parsing for the current dispo workbooks, but only behind explicit file validation and limits
|
||||||
|
- the browser path moves away from blanket `xlsx` import usage to a narrower parser boundary
|
||||||
|
- CSV remains useful for lightweight business imports and is small enough to parse with a narrow local parser.
|
||||||
@@ -23,6 +23,13 @@ const paginationSchema = z.object({
|
|||||||
const importBatchStatusSchema = z.nativeEnum(ImportBatchStatus);
|
const importBatchStatusSchema = z.nativeEnum(ImportBatchStatus);
|
||||||
const stagedRecordStatusSchema = z.nativeEnum(StagedRecordStatus);
|
const stagedRecordStatusSchema = z.nativeEnum(StagedRecordStatus);
|
||||||
const stagedRecordTypeSchema = z.nativeEnum(DispoStagedRecordType);
|
const stagedRecordTypeSchema = z.nativeEnum(DispoStagedRecordType);
|
||||||
|
const workbookPathSchema = z
|
||||||
|
.string()
|
||||||
|
.trim()
|
||||||
|
.min(1, "Workbook path is required.")
|
||||||
|
.refine((value) => value.toLowerCase().endsWith(".xlsx"), {
|
||||||
|
message: "Only .xlsx workbook paths are supported.",
|
||||||
|
});
|
||||||
|
|
||||||
// ─── Router ──────────────────────────────────────────────────────────────────
|
// ─── Router ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -32,12 +39,12 @@ export const dispoRouter = createTRPCRouter({
|
|||||||
stageImportBatch: adminProcedure
|
stageImportBatch: adminProcedure
|
||||||
.input(
|
.input(
|
||||||
z.object({
|
z.object({
|
||||||
chargeabilityWorkbookPath: z.string(),
|
chargeabilityWorkbookPath: workbookPathSchema,
|
||||||
costWorkbookPath: z.string().optional(),
|
costWorkbookPath: workbookPathSchema.optional(),
|
||||||
notes: z.string().nullish(),
|
notes: z.string().nullish(),
|
||||||
planningWorkbookPath: z.string(),
|
planningWorkbookPath: workbookPathSchema,
|
||||||
referenceWorkbookPath: z.string(),
|
referenceWorkbookPath: workbookPathSchema,
|
||||||
rosterWorkbookPath: z.string().optional(),
|
rosterWorkbookPath: workbookPathSchema.optional(),
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
.mutation(async ({ ctx, input }) => {
|
.mutation(async ({ ctx, input }) => {
|
||||||
@@ -56,13 +63,13 @@ export const dispoRouter = createTRPCRouter({
|
|||||||
validateImportBatch: adminProcedure
|
validateImportBatch: adminProcedure
|
||||||
.input(
|
.input(
|
||||||
z.object({
|
z.object({
|
||||||
chargeabilityWorkbookPath: z.string(),
|
chargeabilityWorkbookPath: workbookPathSchema,
|
||||||
costWorkbookPath: z.string().optional(),
|
costWorkbookPath: workbookPathSchema.optional(),
|
||||||
importBatchId: z.string().optional(),
|
importBatchId: z.string().optional(),
|
||||||
notes: z.string().nullish(),
|
notes: z.string().nullish(),
|
||||||
planningWorkbookPath: z.string(),
|
planningWorkbookPath: workbookPathSchema,
|
||||||
referenceWorkbookPath: z.string(),
|
referenceWorkbookPath: workbookPathSchema,
|
||||||
rosterWorkbookPath: z.string().optional(),
|
rosterWorkbookPath: workbookPathSchema.optional(),
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
.query(async ({ input }) => {
|
.query(async ({ input }) => {
|
||||||
|
|||||||
@@ -0,0 +1,58 @@
|
|||||||
|
import { cp, mkdtemp, rm, writeFile } from "node:fs/promises";
|
||||||
|
import os from "node:os";
|
||||||
|
import path from "node:path";
|
||||||
|
import { fileURLToPath } from "node:url";
|
||||||
|
import { afterEach, describe, expect, it } from "vitest";
|
||||||
|
import {
|
||||||
|
MAX_DISPO_WORKBOOK_BYTES,
|
||||||
|
readWorksheetMatrix,
|
||||||
|
} from "../use-cases/dispo-import/read-workbook.js";
|
||||||
|
|
||||||
|
const referenceWorkbookPath = fileURLToPath(
|
||||||
|
new URL("../../../../samples/Dispov2/MandatoryDispoCategories_V3.xlsx", import.meta.url),
|
||||||
|
);
|
||||||
|
|
||||||
|
const tempDirectories: string[] = [];
|
||||||
|
|
||||||
|
afterEach(async () => {
|
||||||
|
await Promise.all(
|
||||||
|
tempDirectories.splice(0).map(async (directory) => {
|
||||||
|
await rm(directory, { recursive: true, force: true });
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
async function makeTempDirectory(): Promise<string> {
|
||||||
|
const directory = await mkdtemp(path.join(os.tmpdir(), "capakraken-read-workbook-"));
|
||||||
|
tempDirectories.push(directory);
|
||||||
|
return directory;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("readWorksheetMatrix", () => {
|
||||||
|
it("reads trusted xlsx worksheets through the hardened reader", async () => {
|
||||||
|
const rows = await readWorksheetMatrix(referenceWorkbookPath, "EID-Attr");
|
||||||
|
|
||||||
|
expect(rows.length).toBeGreaterThan(0);
|
||||||
|
expect(rows.some((row) => row.length > 0)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects legacy .xls workbook paths", async () => {
|
||||||
|
const directory = await makeTempDirectory();
|
||||||
|
const legacyPath = path.join(directory, "legacy-input.xls");
|
||||||
|
await cp(referenceWorkbookPath, legacyPath);
|
||||||
|
|
||||||
|
await expect(readWorksheetMatrix(legacyPath, "EID-Attr")).rejects.toThrow(
|
||||||
|
'Only .xlsx workbooks are supported for dispo imports',
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("rejects oversized workbook files before parsing", async () => {
|
||||||
|
const directory = await makeTempDirectory();
|
||||||
|
const oversizedPath = path.join(directory, "oversized.xlsx");
|
||||||
|
await writeFile(oversizedPath, Buffer.alloc(MAX_DISPO_WORKBOOK_BYTES + 1, 0));
|
||||||
|
|
||||||
|
await expect(readWorksheetMatrix(oversizedPath, "Sheet1")).rejects.toThrow(
|
||||||
|
"Workbook file exceeds the",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -1,8 +1,76 @@
|
|||||||
import * as XLSX from "xlsx";
|
import { stat } from "node:fs/promises";
|
||||||
|
import { createRequire } from "node:module";
|
||||||
|
import path from "node:path";
|
||||||
|
|
||||||
export type WorksheetCellValue = boolean | Date | number | string | null;
|
export type WorksheetCellValue = boolean | Date | number | string | null;
|
||||||
export type WorksheetMatrix = WorksheetCellValue[][];
|
export type WorksheetMatrix = WorksheetCellValue[][];
|
||||||
|
|
||||||
|
type XlsxWorkbook = {
|
||||||
|
Sheets: Record<string, unknown>;
|
||||||
|
};
|
||||||
|
|
||||||
|
type SheetToJsonOptions = {
|
||||||
|
header: 1;
|
||||||
|
raw: true;
|
||||||
|
defval: null;
|
||||||
|
};
|
||||||
|
|
||||||
|
type XlsxRuntime = {
|
||||||
|
readFile(filePath: string, options: { cellDates: true; dense: true }): XlsxWorkbook;
|
||||||
|
utils: {
|
||||||
|
sheet_to_json<T>(worksheet: unknown, options: SheetToJsonOptions): T[];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const require = createRequire(import.meta.url);
|
||||||
|
const XLSX = require("xlsx") as XlsxRuntime;
|
||||||
|
|
||||||
|
const DISPO_WORKBOOK_EXTENSION = ".xlsx";
|
||||||
|
export const MAX_DISPO_WORKBOOK_BYTES = 15 * 1024 * 1024;
|
||||||
|
|
||||||
|
function trimTrailingNulls(row: WorksheetCellValue[]): WorksheetCellValue[] {
|
||||||
|
let end = row.length;
|
||||||
|
while (end > 0 && row[end - 1] === null) {
|
||||||
|
end -= 1;
|
||||||
|
}
|
||||||
|
return row.slice(0, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
function trimTrailingEmptyRows(rows: WorksheetMatrix): WorksheetMatrix {
|
||||||
|
let end = rows.length;
|
||||||
|
while (end > 0 && rows[end - 1]?.length === 0) {
|
||||||
|
end -= 1;
|
||||||
|
}
|
||||||
|
return rows.slice(0, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
async function validateWorkbookPath(workbookPath: string): Promise<string> {
|
||||||
|
const resolvedPath = path.resolve(workbookPath);
|
||||||
|
|
||||||
|
if (path.extname(resolvedPath).toLowerCase() !== DISPO_WORKBOOK_EXTENSION) {
|
||||||
|
throw new Error(
|
||||||
|
`Only ${DISPO_WORKBOOK_EXTENSION} workbooks are supported for dispo imports: "${resolvedPath}"`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
const fileStat = await stat(resolvedPath);
|
||||||
|
if (!fileStat.isFile()) {
|
||||||
|
throw new Error(`Workbook path must point to a readable file: "${resolvedPath}"`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fileStat.size <= 0) {
|
||||||
|
throw new Error(`Workbook file is empty: "${resolvedPath}"`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fileStat.size > MAX_DISPO_WORKBOOK_BYTES) {
|
||||||
|
throw new Error(
|
||||||
|
`Workbook file exceeds the ${MAX_DISPO_WORKBOOK_BYTES} byte import limit: "${resolvedPath}"`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return resolvedPath;
|
||||||
|
}
|
||||||
|
|
||||||
function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
|
function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
|
||||||
if (value === undefined || value === null) {
|
if (value === undefined || value === null) {
|
||||||
return null;
|
return null;
|
||||||
@@ -16,6 +84,38 @@ function normalizeWorksheetCellValue(value: unknown): WorksheetCellValue {
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (typeof value === "object") {
|
||||||
|
const record = value as Record<string, unknown>;
|
||||||
|
|
||||||
|
if ("result" in record) {
|
||||||
|
return normalizeWorksheetCellValue(record.result);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("text" in record && typeof record.text === "string") {
|
||||||
|
return record.text;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("hyperlink" in record && typeof record.hyperlink === "string") {
|
||||||
|
return record.hyperlink;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("richText" in record && Array.isArray(record.richText)) {
|
||||||
|
return record.richText
|
||||||
|
.map((part) => {
|
||||||
|
if (part && typeof part === "object" && "text" in part) {
|
||||||
|
const text = (part as { text?: unknown }).text;
|
||||||
|
return typeof text === "string" ? text : "";
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
})
|
||||||
|
.join("");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ("error" in record && typeof record.error === "string") {
|
||||||
|
return record.error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return String(value);
|
return String(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -23,13 +123,14 @@ export async function readWorksheetMatrix(
|
|||||||
workbookPath: string,
|
workbookPath: string,
|
||||||
sheetName: string,
|
sheetName: string,
|
||||||
): Promise<WorksheetMatrix> {
|
): Promise<WorksheetMatrix> {
|
||||||
const workbook = XLSX.readFile(workbookPath, {
|
const resolvedPath = await validateWorkbookPath(workbookPath);
|
||||||
|
const workbook = XLSX.readFile(resolvedPath, {
|
||||||
cellDates: true,
|
cellDates: true,
|
||||||
dense: true,
|
dense: true,
|
||||||
});
|
});
|
||||||
const worksheet = workbook.Sheets[sheetName];
|
const worksheet = workbook.Sheets[sheetName];
|
||||||
if (!worksheet) {
|
if (!worksheet) {
|
||||||
throw new Error(`Worksheet "${sheetName}" not found in workbook "${workbookPath}"`);
|
throw new Error(`Worksheet "${sheetName}" not found in workbook "${resolvedPath}"`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const rows = XLSX.utils.sheet_to_json<(WorksheetCellValue | null)[]>(worksheet, {
|
const rows = XLSX.utils.sheet_to_json<(WorksheetCellValue | null)[]>(worksheet, {
|
||||||
@@ -38,7 +139,11 @@ export async function readWorksheetMatrix(
|
|||||||
defval: null,
|
defval: null,
|
||||||
});
|
});
|
||||||
|
|
||||||
return rows.map((row) => row.map((value) => normalizeWorksheetCellValue(value)));
|
return trimTrailingEmptyRows(
|
||||||
|
rows.map((row: (WorksheetCellValue | null)[]) =>
|
||||||
|
trimTrailingNulls(row.map((value: WorksheetCellValue | null) => normalizeWorksheetCellValue(value))),
|
||||||
|
),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
export function getCellString(
|
export function getCellString(
|
||||||
|
|||||||
Generated
+3
@@ -87,6 +87,9 @@ importers:
|
|||||||
dompurify:
|
dompurify:
|
||||||
specifier: ^3.3.3
|
specifier: ^3.3.3
|
||||||
version: 3.3.3
|
version: 3.3.3
|
||||||
|
exceljs:
|
||||||
|
specifier: ^4.4.0
|
||||||
|
version: 4.4.0
|
||||||
framer-motion:
|
framer-motion:
|
||||||
specifier: ^12.38.0
|
specifier: ^12.38.0
|
||||||
version: 12.38.0(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
|
version: 12.38.0(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
|
||||||
|
|||||||
Reference in New Issue
Block a user