security: RBAC cache cross-instance invalidation + force re-login on role/perm change (#57)

- shrink roleDefaults cache TTL from 60s to 10s (safety-net staleness bound)
- publish/subscribe on capakraken:rbac-invalidate so peer instances drop
  their local role-defaults cache on mutation (ioredis pub/sub; lazy init
  so idle test files don't open connections)
- after updateUserRole/setUserPermissions/resetUserPermissions: delete
  all ActiveSession rows for that user so the next request re-auths via
  tRPC's jti check, and invalidate the role-defaults cache
- tests: peer-instance invalidation via FakeRedis pub/sub fan-out; mutation
  side-effects assert session deletion + cache invalidation on each path

Without this, demoted admins kept their JWT valid until expiry and peer
instances kept serving stale role defaults for up to the TTL window.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-04-17 13:01:15 +02:00
parent 23c6e0e04b
commit e2dddd30df
5 changed files with 440 additions and 4 deletions
@@ -0,0 +1,131 @@
import { EventEmitter } from "node:events";
import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
/**
* Ticket #57 — verify that:
*
* 1. Publishing on RBAC_INVALIDATE_CHANNEL from node A causes node B to
* drop its local `_roleDefaultsCache`, so its next `loadRoleDefaults()`
* call re-reads from the DB (acceptance criterion:
* "2nd node sees update within 1 s" — we verify the mechanism, not the
* Redis latency).
*
* 2. `invalidateRoleDefaultsCache()` on the current node publishes on the
* same channel so peer instances receive the event.
*
* Strategy: stub `ioredis` with an EventEmitter-based fake before loading
* trpc.ts. The fake captures `publish()` calls and lets the test emit
* synthetic "message" events.
*/
// Fake Redis with two separate instances so the test mirrors the multi-node
// shape: one as subscriber, one as publisher. Both share the same module-
// level event router keyed by channel.
const channelSubscribers = new Map<string, Set<FakeRedis>>();
const publishCalls: Array<{ channel: string; message: string }> = [];
class FakeRedis extends EventEmitter {
constructor(_url: string, _opts: unknown) {
super();
}
// eslint-disable-next-line @typescript-eslint/require-await
async subscribe(channel: string): Promise<number> {
let set = channelSubscribers.get(channel);
if (!set) {
set = new Set();
channelSubscribers.set(channel, set);
}
set.add(this);
return set.size;
}
// eslint-disable-next-line @typescript-eslint/require-await
async publish(channel: string, message: string): Promise<number> {
publishCalls.push({ channel, message });
const subs = channelSubscribers.get(channel);
if (!subs) return 0;
// Fan out synchronously so the subscriber handler runs before the test
// assertion reads the cache — matches real ioredis "message" semantics
// from the subscriber's point of view.
for (const sub of subs) sub.emit("message", channel, message);
return subs.size;
}
}
vi.mock("ioredis", () => ({ Redis: FakeRedis, default: FakeRedis }));
vi.mock("../lib/logger.js", () => ({
logger: { warn: vi.fn(), error: vi.fn(), info: vi.fn(), debug: vi.fn() },
}));
// Prisma client mock — loadRoleDefaults pulls from systemRoleConfig.findMany.
const findManyCalls: number[] = [];
vi.mock("@capakraken/db", async () => {
const actual = await vi.importActual<Record<string, unknown>>("@capakraken/db");
return {
...actual,
prisma: {
systemRoleConfig: {
findMany: vi.fn().mockImplementation(async () => {
findManyCalls.push(Date.now());
return [{ role: "ADMIN", defaultPermissions: ["MANAGE_USERS"] }];
}),
},
},
};
});
// REDIS_URL is needed so trpc.ts decides to instantiate the fake Redis.
// `trpc.ts` now reads it lazily on first RBAC call, so setting it in
// beforeAll is enough; we always restore in afterAll to avoid leaking into
// other test files in the same worker.
const originalRedisUrl = process.env["REDIS_URL"];
describe("RBAC cache Redis pub/sub (#57)", () => {
beforeAll(() => {
process.env["REDIS_URL"] = "redis://fake:6379";
});
afterAll(() => {
if (originalRedisUrl === undefined) delete process.env["REDIS_URL"];
else process.env["REDIS_URL"] = originalRedisUrl;
});
beforeEach(() => {
findManyCalls.length = 0;
});
it("peer-instance invalidation: receiving a message clears the local cache", async () => {
const { loadRoleDefaults } = await import("../trpc.js");
// Warm the cache.
await loadRoleDefaults();
const hitsAfterWarm = findManyCalls.length;
expect(hitsAfterWarm).toBe(1);
// Second call within TTL should be cached — no additional findMany.
await loadRoleDefaults();
expect(findManyCalls.length).toBe(hitsAfterWarm);
// Simulate a peer instance publishing an invalidation: grab any
// subscriber on the channel and fire the event as if Redis delivered it.
const subs = channelSubscribers.get("capakraken:rbac-invalidate");
expect(subs).toBeDefined();
expect(subs!.size).toBeGreaterThanOrEqual(1);
for (const sub of subs!) sub.emit("message", "capakraken:rbac-invalidate", "1");
// Next load must hit the DB again.
await loadRoleDefaults();
expect(findManyCalls.length).toBe(hitsAfterWarm + 1);
});
it("local invalidation publishes on the RBAC channel", async () => {
const { invalidateRoleDefaultsCache } = await import("../trpc.js");
const countBefore = publishCalls.length;
invalidateRoleDefaultsCache();
// Give the microtask queue one tick (publish returns a promise).
await Promise.resolve();
const newPublishes = publishCalls.slice(countBefore);
expect(newPublishes.length).toBe(1);
expect(newPublishes[0]!.channel).toBe("capakraken:rbac-invalidate");
});
});
@@ -0,0 +1,180 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { SystemRole } from "@capakraken/shared";
vi.mock("../lib/audit.js", () => ({ createAuditEntry: vi.fn() }));
vi.mock("../lib/audit-helpers.js", () => ({
makeAuditLogger: () => vi.fn(),
}));
const invalidateRoleDefaultsCache = vi.hoisted(() => vi.fn());
vi.mock("../trpc.js", () => ({
invalidateRoleDefaultsCache,
}));
import {
resetUserPermissions,
setUserPermissions,
updateUserRole,
} from "../router/user-procedure-support.js";
/**
* Ticket #57 — when a privileged-state mutation happens we MUST:
* 1. delete every ActiveSession for the affected user (forces next-request
* re-auth, because the tRPC route validates `jti` against ActiveSession),
* 2. call `invalidateRoleDefaultsCache()` so peer instances drop their
* 10 s cache entries via the Redis pub/sub fan-out.
*
* Without (1), a demoted admin keeps their JWT valid until it expires, so
* permissions resolved server-side still reflect the old role. Without (2),
* peer instances keep serving the old role defaults for up to the TTL.
*/
describe("RBAC mutation side effects (#57)", () => {
beforeEach(() => {
vi.clearAllMocks();
});
function makeCtx(dbOverrides: Record<string, unknown> = {}) {
const defaultDb = {
user: {
findUnique: vi.fn(),
update: vi.fn(),
},
activeSession: {
deleteMany: vi.fn().mockResolvedValue({ count: 3 }),
},
...dbOverrides,
};
return {
ctx: {
db: defaultDb as never,
dbUser: {
id: "admin_1",
systemRole: SystemRole.ADMIN,
permissionOverrides: null,
},
session: {
user: { email: "admin@example.com", name: "Admin", image: null },
expires: "2099-01-01T00:00:00.000Z",
},
},
db: defaultDb,
};
}
describe("updateUserRole", () => {
it("deletes active sessions and invalidates cache when role changes", async () => {
const { ctx, db } = makeCtx({
user: {
findUnique: vi.fn().mockResolvedValue({
id: "user_victim",
name: "Victim",
email: "victim@example.com",
systemRole: SystemRole.ADMIN,
}),
update: vi.fn().mockResolvedValue({
id: "user_victim",
name: "Victim",
email: "victim@example.com",
systemRole: SystemRole.USER,
}),
},
});
await updateUserRole(ctx as never, {
id: "user_victim",
systemRole: SystemRole.USER,
});
expect(db.activeSession.deleteMany).toHaveBeenCalledWith({
where: { userId: "user_victim" },
});
expect(invalidateRoleDefaultsCache).toHaveBeenCalledTimes(1);
});
it("does NOT delete sessions or invalidate when role is unchanged", async () => {
const { ctx, db } = makeCtx({
user: {
findUnique: vi.fn().mockResolvedValue({
id: "user_1",
name: "Alice",
email: "alice@example.com",
systemRole: SystemRole.MANAGER,
}),
update: vi.fn().mockResolvedValue({
id: "user_1",
name: "Alice",
email: "alice@example.com",
systemRole: SystemRole.MANAGER,
}),
},
});
await updateUserRole(ctx as never, {
id: "user_1",
systemRole: SystemRole.MANAGER,
});
expect(db.activeSession.deleteMany).not.toHaveBeenCalled();
expect(invalidateRoleDefaultsCache).not.toHaveBeenCalled();
});
});
describe("setUserPermissions", () => {
it("deletes active sessions and invalidates cache on every call", async () => {
const { ctx, db } = makeCtx({
user: {
findUnique: vi.fn().mockResolvedValue({
id: "user_1",
name: "Alice",
email: "alice@example.com",
permissionOverrides: null,
}),
update: vi.fn().mockResolvedValue({
id: "user_1",
name: "Alice",
email: "alice@example.com",
permissionOverrides: { granted: ["x"], denied: [] },
}),
},
});
await setUserPermissions(ctx as never, {
userId: "user_1",
overrides: { granted: ["x"], denied: [] },
});
expect(db.activeSession.deleteMany).toHaveBeenCalledWith({
where: { userId: "user_1" },
});
expect(invalidateRoleDefaultsCache).toHaveBeenCalledTimes(1);
});
});
describe("resetUserPermissions", () => {
it("deletes active sessions and invalidates cache", async () => {
const { ctx, db } = makeCtx({
user: {
findUnique: vi.fn().mockResolvedValue({
id: "user_1",
name: "Alice",
email: "alice@example.com",
permissionOverrides: { granted: ["x"], denied: [] },
}),
update: vi.fn().mockResolvedValue({
id: "user_1",
name: "Alice",
email: "alice@example.com",
permissionOverrides: null,
}),
},
});
await resetUserPermissions(ctx as never, { userId: "user_1" });
expect(db.activeSession.deleteMany).toHaveBeenCalledWith({
where: { userId: "user_1" },
});
expect(invalidateRoleDefaultsCache).toHaveBeenCalledTimes(1);
});
});
});
@@ -49,12 +49,20 @@ vi.mock("otpauth", () => {
const createCaller = createCallerFactory(userRouter);
function createAdminCaller(db: Record<string, unknown>) {
// Provide a no-op activeSession stub by default — some mutation paths
// (setPermissions / resetPermissions / updateRole, see ticket #57) now
// invalidate active sessions to force a re-login on privilege changes.
// Individual tests can override by passing their own `activeSession` key.
const dbWithDefaults = {
activeSession: { deleteMany: vi.fn().mockResolvedValue({ count: 0 }) },
...db,
};
return createCaller({
session: {
user: { email: "admin@example.com", name: "Admin", image: null },
expires: "2099-01-01T00:00:00.000Z",
},
db: db as never,
db: dbWithDefaults as never,
dbUser: {
id: "user_admin",
systemRole: SystemRole.ADMIN,
@@ -5,6 +5,7 @@ import { z } from "zod";
import { findUniqueOrThrow } from "../db/helpers.js";
import { makeAuditLogger } from "../lib/audit-helpers.js";
import type { TRPCContext } from "../trpc.js";
import { invalidateRoleDefaultsCache } from "../trpc.js";
export const CreateUserInputSchema = z.object({
email: z.string().email(),
@@ -205,6 +206,16 @@ export async function updateUserRole(
select: { id: true, name: true, email: true, systemRole: true },
});
// Force re-login: a role change (especially a demotion) must revoke
// currently-issued JWTs. Our JWT middleware checks the jti against
// ActiveSession on every tRPC call, so wiping these rows invalidates
// every outstanding session for this user on the next request.
if (before.systemRole !== updated.systemRole) {
await ctx.db.activeSession.deleteMany({ where: { userId: updated.id } });
// Also nuke the per-instance role-defaults cache (cross-node via pub/sub).
invalidateRoleDefaultsCache();
}
audit({
entityType: "User",
entityId: updated.id,
@@ -385,6 +396,12 @@ export async function setUserPermissions(
select: { id: true, name: true, email: true, permissionOverrides: true },
});
// Permission overrides can remove access — force affected sessions to
// re-authenticate so the new override set is applied immediately rather
// than waiting for the TTL. Cross-node cache invalidation via pub/sub.
await ctx.db.activeSession.deleteMany({ where: { userId: input.userId } });
invalidateRoleDefaultsCache();
audit({
entityType: "User",
entityId: input.userId,
@@ -422,6 +439,11 @@ export async function resetUserPermissions(
select: { id: true, name: true, email: true, permissionOverrides: true },
});
// Reset may remove privileges that were `granted` via override — force
// re-login so the regression applies on the next request.
await ctx.db.activeSession.deleteMany({ where: { userId: input.userId } });
invalidateRoleDefaultsCache();
audit({
entityType: "User",
entityId: input.userId,
+98 -3
View File
@@ -1,7 +1,9 @@
import { prisma, Prisma } from "@capakraken/db";
import { resolvePermissions, PermissionKey, SystemRole } from "@capakraken/shared";
import { initTRPC, TRPCError } from "@trpc/server";
import { Redis } from "ioredis";
import { ZodError } from "zod";
import { logger } from "./lib/logger.js";
import { assertNoDevBypassInProduction, isE2eBypassActive } from "./lib/runtime-security.js";
import { loggingMiddleware } from "./middleware/logging.js";
import { apiRateLimiter } from "./middleware/rate-limit.js";
@@ -24,12 +26,87 @@ export interface TRPCContext {
clientIp: string | null;
}
// Cache role defaults for 60 seconds to avoid DB hit on every request
// Cache role defaults for 10 seconds. Short TTL is the fail-safe in case the
// Redis pub/sub invalidation below is down — even without cross-node
// invalidation the staleness window is bounded to 10 s for any revocation.
let _roleDefaultsCache: Record<string, PermissionKey[]> | null = null;
let _roleDefaultsCacheTime = 0;
const ROLE_DEFAULTS_TTL = 60_000;
const ROLE_DEFAULTS_TTL = 10_000;
// ─── Cross-instance cache invalidation via Redis pub/sub ──────────────────────
// Without this, `invalidateRoleDefaultsCache()` only clears the in-memory cache
// on the node that invoked it. Other nodes keep serving stale permissions for
// up to ROLE_DEFAULTS_TTL after a revocation, which is a real RBAC risk in
// multi-instance deployments (admin demotion, permission-override removal).
//
// We publish a single invalidate message per change; every node subscribes and
// clears its local cache on receipt. Failure to publish/subscribe is logged
// but never thrown — the TTL above is the fall-back.
const RBAC_INVALIDATE_CHANNEL = "capakraken:rbac-invalidate";
let _rbacPublisher: Redis | null = null;
let _rbacSubscriber: Redis | null = null;
let _rbacSubscriberInitialized = false;
function rbacRedisUrl(): string | null {
return process.env["REDIS_URL"] ?? null;
}
function getRbacPublisher(): Redis | null {
const url = rbacRedisUrl();
if (!url) return null;
if (!_rbacPublisher) {
try {
_rbacPublisher = new Redis(url, { lazyConnect: false, enableReadyCheck: false });
_rbacPublisher.on("error", (err: unknown) => {
logger.warn({ err, channel: RBAC_INVALIDATE_CHANNEL }, "RBAC Redis publisher error");
});
} catch (err) {
logger.warn(
{ err },
"RBAC Redis publisher init failed; cache invalidation will be local-only",
);
_rbacPublisher = null;
}
}
return _rbacPublisher;
}
function ensureRbacSubscriber(): void {
if (_rbacSubscriberInitialized) return;
const url = rbacRedisUrl();
if (!url) return;
_rbacSubscriberInitialized = true;
try {
_rbacSubscriber = new Redis(url, { lazyConnect: false, enableReadyCheck: false });
_rbacSubscriber.on("error", (err: unknown) => {
logger.warn({ err, channel: RBAC_INVALIDATE_CHANNEL }, "RBAC Redis subscriber error");
});
void _rbacSubscriber.subscribe(RBAC_INVALIDATE_CHANNEL).catch((err: unknown) => {
logger.warn({ err, channel: RBAC_INVALIDATE_CHANNEL }, "RBAC Redis subscribe failed");
});
_rbacSubscriber.on("message", (_channel: string, _message: string) => {
// Any message on this channel means "someone mutated role/permission
// state — drop our local view now". Body is ignored; the next request
// re-reads from DB.
_roleDefaultsCache = null;
_roleDefaultsCacheTime = 0;
});
} catch (err) {
logger.warn(
{ err },
"RBAC Redis subscriber init failed; cache invalidation will be local-only",
);
}
}
export async function loadRoleDefaults(): Promise<Record<string, PermissionKey[]>> {
// Lazy-init the peer-invalidation subscriber on first use. Doing this at
// first call (not module load) means test files that never touch RBAC never
// open a Redis connection, and env changes set up by specific tests are
// observed rather than snapshotted at import time.
ensureRbacSubscriber();
const now = Date.now();
if (_roleDefaultsCache && now - _roleDefaultsCacheTime < ROLE_DEFAULTS_TTL) {
return _roleDefaultsCache;
@@ -46,10 +123,28 @@ export async function loadRoleDefaults(): Promise<Record<string, PermissionKey[]
return map;
}
/** Invalidate the role defaults cache (call after updating SystemRoleConfig) */
/**
* Invalidate the role defaults cache on every running instance.
*
* Clears the local cache immediately and publishes a Redis message so peer
* instances clear theirs too. If Redis is unavailable, only the local cache
* is cleared — the 10 s TTL caps staleness on other nodes.
*
* Call this after mutating SystemRoleConfig, User.systemRole, or
* User.permissionOverrides.
*/
export function invalidateRoleDefaultsCache(): void {
_roleDefaultsCache = null;
_roleDefaultsCacheTime = 0;
const pub = getRbacPublisher();
if (!pub) return;
void pub.publish(RBAC_INVALIDATE_CHANNEL, "1").catch((err: unknown) => {
logger.warn(
{ err, channel: RBAC_INVALIDATE_CHANNEL },
"RBAC invalidation publish rejected — peer instances will rely on TTL",
);
});
}
export function createTRPCContext(opts: {