feat: additive security improvements — prompt guard, content filter, data classification

Prompt Injection Detection (EGAI 4.6.3.2):
- 12-pattern regex scanner on user messages before AI processing
- Logs warning + creates SecurityAlert audit entry on detection
- Reinforces system prompt instead of blocking (non-breaking)

AI Output Content Filter (EGAI 4.3.2.1):
- Scans AI responses for leaked credentials/secrets
- Auto-redacts passwords, API keys, bearer tokens, private keys
- Logs warning + SecurityAlert audit when redaction occurs

AI Tool Execution Audit Trail (IAAI 3.6.35):
- Every AI tool call creates AiToolExecution audit entry
- Logs tool name, parameters, userId, source: "ai"

Data Classification Labels (EGAI 4.2):
- DATA_CLASSIFICATION constant mapping all fields to HC/C/IR/U
- Exported from @capakraken/shared

All changes strictly additive — no existing logic modified.

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
2026-03-27 16:23:33 +01:00
parent 1fc1e9f24c
commit 2a005794e7
5 changed files with 170 additions and 2 deletions
+28
View File
@@ -0,0 +1,28 @@
/**
* Basic content filter for AI outputs.
* Flags potentially problematic content and redacts sensitive data.
*
* EGAI 4.3.2.1 — AI Output Content Check
*/
const SENSITIVE_PATTERNS = [
/password\s*[:=]\s*\S+/gi,
/api[_-]?key\s*[:=]\s*\S+/gi,
/secret\s*[:=]\s*\S+/gi,
/bearer\s+[a-zA-Z0-9._-]{20,}/gi,
/-----BEGIN\s+(RSA\s+)?PRIVATE\s+KEY/gi,
];
export function checkAiOutput(output: string): { clean: boolean; redacted: string } {
let redacted = output;
let clean = true;
for (const pattern of SENSITIVE_PATTERNS) {
if (pattern.test(redacted)) {
clean = false;
// Reset lastIndex since we used the `g` flag for test()
pattern.lastIndex = 0;
redacted = redacted.replace(pattern, "[REDACTED]");
}
}
return { clean, redacted };
}
+35
View File
@@ -0,0 +1,35 @@
/**
* Simple prompt injection detection for AI inputs.
* Checks for common injection patterns in user messages.
*
* EGAI 4.6.3.2 — Prompt Injection Detection
*/
const INJECTION_PATTERNS = [
/ignore\s+(all\s+)?previous\s+instructions/i,
/disregard\s+(all\s+)?prior/i,
/you\s+are\s+now\s+/i,
/forget\s+(everything|all|your)\s+(instructions|rules|guidelines)/i,
/system\s*:\s*/i,
/\[INST\]/i,
/<<SYS>>/i,
/\bDAN\b.*\bmode\b/i,
/jailbreak/i,
/bypass\s+(security|filter|restriction)/i,
/pretend\s+you\s+(are|have)\s+no\s+(rules|restrictions)/i,
/act\s+as\s+(if|though)\s+you\s+(have|are)\s+no/i,
];
export interface PromptGuardResult {
safe: boolean;
matchedPattern?: string;
}
export function checkPromptInjection(input: string): PromptGuardResult {
for (const pattern of INJECTION_PATTERNS) {
if (pattern.test(input)) {
return { safe: false, matchedPattern: pattern.source };
}
}
return { safe: true };
}