feat(import): harden workbook parser boundaries
This commit is contained in:
@@ -7,7 +7,8 @@
|
||||
"build": "next build",
|
||||
"start": "next start -p 3100",
|
||||
"lint": "next lint",
|
||||
"typecheck": "tsc --noEmit",
|
||||
"typecheck": "tsc --project tsconfig.typecheck.json --noEmit",
|
||||
"test:unit": "vitest run",
|
||||
"test:e2e": "playwright test"
|
||||
},
|
||||
"dependencies": {
|
||||
@@ -43,12 +44,12 @@
|
||||
"recharts": "^3.7.0",
|
||||
"tailwind-merge": "^2.6.0",
|
||||
"three": "^0.183.2",
|
||||
"xlsx": "^0.18.5",
|
||||
"zod": "^3.23.8"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@capakraken/tsconfig": "workspace:*",
|
||||
"@playwright/test": "^1.49.1",
|
||||
"@vitest/coverage-v8": "^2.1.9",
|
||||
"@types/dompurify": "^3.2.0",
|
||||
"@types/node": "^22.10.2",
|
||||
"@types/react": "^19.0.6",
|
||||
@@ -58,6 +59,7 @@
|
||||
"autoprefixer": "^10.4.20",
|
||||
"postcss": "^8.4.49",
|
||||
"tailwindcss": "^3.4.17",
|
||||
"typescript": "^5.6.3"
|
||||
"typescript": "^5.6.3",
|
||||
"vitest": "^2.1.9"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
MAX_BROWSER_SPREADSHEET_BYTES,
|
||||
assertSpreadsheetFile,
|
||||
parseSpreadsheet,
|
||||
} from "./excel.js";
|
||||
|
||||
async function createWorkbookFile(
|
||||
rows: unknown[][],
|
||||
fileName = "spreadsheet.xlsx",
|
||||
): Promise<File> {
|
||||
const ExcelJS = await import("exceljs");
|
||||
const workbook = new ExcelJS.Workbook();
|
||||
const worksheet = workbook.addWorksheet("Sheet1");
|
||||
|
||||
for (const row of rows) {
|
||||
worksheet.addRow(row);
|
||||
}
|
||||
|
||||
const buffer = await workbook.xlsx.writeBuffer();
|
||||
return new File([buffer], fileName, {
|
||||
type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
});
|
||||
}
|
||||
|
||||
describe("excel import helpers", () => {
|
||||
it("parses csv files with quoted values and skips blank rows", async () => {
|
||||
const file = new File(
|
||||
['name,role\n"Alice, A.",Engineer\n\nBob,Producer\n'],
|
||||
"people.csv",
|
||||
{ type: "text/csv" },
|
||||
);
|
||||
|
||||
await expect(parseSpreadsheet(file)).resolves.toEqual([
|
||||
{ name: "Alice, A.", role: "Engineer" },
|
||||
{ name: "Bob", role: "Producer" },
|
||||
]);
|
||||
});
|
||||
|
||||
it("parses xlsx files and normalizes date cells to ISO strings", async () => {
|
||||
const file = await createWorkbookFile([
|
||||
["name", "startDate", "active"],
|
||||
["Alice", new Date("2026-03-30T09:15:00.000Z"), true],
|
||||
]);
|
||||
|
||||
await expect(parseSpreadsheet(file)).resolves.toEqual([
|
||||
{
|
||||
name: "Alice",
|
||||
startDate: "2026-03-30T09:15:00.000Z",
|
||||
active: "true",
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it("rejects duplicate headers in xlsx imports", async () => {
|
||||
const file = await createWorkbookFile([
|
||||
["Name", "name"],
|
||||
["Alice", "Producer"],
|
||||
]);
|
||||
|
||||
await expect(parseSpreadsheet(file)).rejects.toThrow('duplicate header "name"');
|
||||
});
|
||||
|
||||
it("rejects legacy .xls uploads before parsing", () => {
|
||||
const file = new File(["legacy"], "legacy.xls", {
|
||||
type: "application/vnd.ms-excel",
|
||||
});
|
||||
|
||||
expect(() => assertSpreadsheetFile(file)).toThrow(
|
||||
"Legacy .xls files are not supported.",
|
||||
);
|
||||
});
|
||||
|
||||
it("rejects oversized spreadsheet uploads before parsing", () => {
|
||||
const file = new File([Buffer.alloc(MAX_BROWSER_SPREADSHEET_BYTES + 1)], "oversized.xlsx", {
|
||||
type: "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
});
|
||||
|
||||
expect(() => assertSpreadsheetFile(file)).toThrow(
|
||||
`The selected file exceeds the ${MAX_BROWSER_SPREADSHEET_BYTES} byte limit`,
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -3,6 +3,8 @@ const CSV_EXTENSION = ".csv";
|
||||
const XLS_EXTENSION = ".xls";
|
||||
|
||||
export const MAX_BROWSER_SPREADSHEET_BYTES = 10 * 1024 * 1024;
|
||||
export const MAX_BROWSER_SPREADSHEET_ROWS = 5000;
|
||||
export const MAX_BROWSER_SPREADSHEET_COLUMNS = 200;
|
||||
|
||||
type ExcelJsModule = typeof import("exceljs");
|
||||
let _excelJs: ExcelJsModule | null = null;
|
||||
@@ -117,8 +119,47 @@ function parseCsvMatrix(input: string): string[][] {
|
||||
return rows;
|
||||
}
|
||||
|
||||
function matrixToObjects(rows: string[][]): Record<string, string>[] {
|
||||
export function assertTabularMatrixWithinLimits(rows: string[][], contextLabel: string): void {
|
||||
if (rows.length > MAX_BROWSER_SPREADSHEET_ROWS + 1) {
|
||||
throw new Error(
|
||||
`The selected file exceeds the ${MAX_BROWSER_SPREADSHEET_ROWS} row limit for ${contextLabel}.`,
|
||||
);
|
||||
}
|
||||
|
||||
const widestRow = rows.reduce((max, row) => Math.max(max, row.length), 0);
|
||||
if (widestRow > MAX_BROWSER_SPREADSHEET_COLUMNS) {
|
||||
throw new Error(
|
||||
`The selected file exceeds the ${MAX_BROWSER_SPREADSHEET_COLUMNS} column limit for ${contextLabel}.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export function assertHeaderRow(headers: string[], contextLabel: string): void {
|
||||
if (headers.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const blankHeaderIndex = headers.findIndex((header) => header.length === 0);
|
||||
if (blankHeaderIndex >= 0) {
|
||||
throw new Error(
|
||||
`The selected file contains an empty header cell in column ${blankHeaderIndex + 1} and cannot be used for ${contextLabel}.`,
|
||||
);
|
||||
}
|
||||
|
||||
const seen = new Set<string>();
|
||||
for (const header of headers) {
|
||||
const normalized = header.toLowerCase();
|
||||
if (seen.has(normalized)) {
|
||||
throw new Error(`The selected file contains duplicate header "${header}" and cannot be used for ${contextLabel}.`);
|
||||
}
|
||||
seen.add(normalized);
|
||||
}
|
||||
}
|
||||
|
||||
function matrixToObjects(rows: string[][], contextLabel: string): Record<string, string>[] {
|
||||
assertTabularMatrixWithinLimits(rows, contextLabel);
|
||||
const headers = (rows[0] ?? []).map((header) => header.trim());
|
||||
assertHeaderRow(headers, contextLabel);
|
||||
if (headers.length === 0) {
|
||||
return [];
|
||||
}
|
||||
@@ -203,7 +244,7 @@ async function parseXlsxSpreadsheet(file: File): Promise<Record<string, string>[
|
||||
rows.push(cells);
|
||||
}
|
||||
|
||||
return matrixToObjects(rows);
|
||||
return matrixToObjects(rows, "spreadsheet import");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -214,7 +255,7 @@ export async function parseSpreadsheet(file: File): Promise<Record<string, strin
|
||||
assertSpreadsheetFile(file);
|
||||
|
||||
if (getFileExtension(file.name) === CSV_EXTENSION) {
|
||||
return matrixToObjects(parseCsvMatrix(await file.text()));
|
||||
return matrixToObjects(parseCsvMatrix(await file.text()), "spreadsheet import");
|
||||
}
|
||||
|
||||
return parseXlsxSpreadsheet(file);
|
||||
|
||||
@@ -0,0 +1,106 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { matchRoleName, parseSkillMatrixWorkbook } from "./skillMatrixParser.js";
|
||||
|
||||
async function createWorkbookBuffer(
|
||||
sheets: Array<{ name: string; rows: unknown[][] }>,
|
||||
): Promise<ArrayBuffer> {
|
||||
const ExcelJS = await import("exceljs");
|
||||
const workbook = new ExcelJS.Workbook();
|
||||
|
||||
for (const sheet of sheets) {
|
||||
const worksheet = workbook.addWorksheet(sheet.name);
|
||||
for (const row of sheet.rows) {
|
||||
worksheet.addRow(row);
|
||||
}
|
||||
}
|
||||
|
||||
const buffer = await workbook.xlsx.writeBuffer();
|
||||
const bytes = buffer instanceof Uint8Array ? buffer : new Uint8Array(buffer);
|
||||
return bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength);
|
||||
}
|
||||
|
||||
describe("skill matrix parser", () => {
|
||||
it("extracts employee info and merges skills by highest proficiency", async () => {
|
||||
const workbook = await createWorkbookBuffer([
|
||||
{
|
||||
name: "Employee Information",
|
||||
rows: [
|
||||
["item", "property"],
|
||||
["Full Name", "Alex Artist"],
|
||||
["Area of Expertise", "Compositing"],
|
||||
["Years of Experience", "7.4"],
|
||||
["Portfolio URL", "https://portfolio.example/alex"],
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Software Skills",
|
||||
rows: [
|
||||
["category", "item", "property", "main skillset"],
|
||||
["Software", "Nuke", "2", "1"],
|
||||
["Software", "Photoshop", "0", ""],
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Technical Skillset",
|
||||
rows: [
|
||||
["category", "item", "property", "main skillset"],
|
||||
["Pipeline", "Nuke", "4", ""],
|
||||
["Pipeline", "Python", "3", "2"],
|
||||
],
|
||||
},
|
||||
]);
|
||||
|
||||
await expect(parseSkillMatrixWorkbook(workbook)).resolves.toEqual({
|
||||
employeeInfo: {
|
||||
displayName: "Alex Artist",
|
||||
areaOfExpertise: "Compositing",
|
||||
yearsOfExperience: 7,
|
||||
portfolioUrl: "https://portfolio.example/alex",
|
||||
},
|
||||
skills: expect.arrayContaining([
|
||||
{
|
||||
skill: "Nuke",
|
||||
category: "Pipeline",
|
||||
proficiency: 5,
|
||||
},
|
||||
{
|
||||
skill: "Python",
|
||||
category: "Pipeline",
|
||||
proficiency: 4,
|
||||
isMainSkill: true,
|
||||
},
|
||||
]),
|
||||
});
|
||||
});
|
||||
|
||||
it("rejects duplicate headers in skill sheets", async () => {
|
||||
const workbook = await createWorkbookBuffer([
|
||||
{
|
||||
name: "Employee Information",
|
||||
rows: [
|
||||
["item", "property"],
|
||||
["Full Name", "Alex Artist"],
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Software Skills",
|
||||
rows: [
|
||||
["item", "item", "property"],
|
||||
["Nuke", "Duplicate", "2"],
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Technical Skillset",
|
||||
rows: [["category", "item", "property"]],
|
||||
},
|
||||
]);
|
||||
|
||||
await expect(parseSkillMatrixWorkbook(workbook)).rejects.toThrow('duplicate header "item"');
|
||||
});
|
||||
|
||||
it("matches role names by exact and partial matches", () => {
|
||||
expect(matchRoleName("Compositing", ["Producer", "Compositing"])).toBe("Compositing");
|
||||
expect(matchRoleName("Senior Producer", ["Producer", "Lighting"])).toBe("Producer");
|
||||
expect(matchRoleName("Rigging", ["Producer", "Lighting"])).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -1,4 +1,5 @@
|
||||
import type { SkillEntry } from "@capakraken/shared";
|
||||
import { assertHeaderRow, assertTabularMatrixWithinLimits } from "./excel.js";
|
||||
|
||||
type ExcelJsModule = typeof import("exceljs");
|
||||
|
||||
@@ -80,7 +81,9 @@ function worksheetToRowObjects(
|
||||
rows.push(cells);
|
||||
}
|
||||
|
||||
assertTabularMatrixWithinLimits(rows, "skill matrix import");
|
||||
const headers = (rows[0] ?? []).map((header) => header.trim());
|
||||
assertHeaderRow(headers, "skill matrix import");
|
||||
if (headers.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user