From 7bcc831b5c60d5828581bdbe79593a6970df1429 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hartmut=20N=C3=B6renberg?= <hn@hartmut-noerenberg.com>
Date: Mon, 30 Mar 2026 23:35:29 +0200
Subject: [PATCH] refactor(ops): standardize image-based production delivery

---
 .github/workflows/ci.yml                    |  23 +-
 .github/workflows/deploy-prod.yml           |   2 +-
 .github/workflows/deploy-staging.yml        |   2 +-
 .github/workflows/release-image.yml         |  11 +
 docker-compose.cicd.yml                     |  77 ----
 docker-compose.prod.yml                     |  42 +-
 docs/README.md                              |   2 +-
 docs/ai-excellence-due-diligence-roadmap.md |   9 +-
 docs/architecture-hardening-backlog.md      |   4 +-
 docs/ci-cd-manual.md                        | 404 +++++++-------------
 docs/cicd-target-architecture.md            | 217 ++++-------
 package.json                                |   1 +
 scripts/check-architecture-guardrails.mjs   | 155 ++++++++
 tooling/deploy/.env.production.example      |   3 +-
 tooling/deploy/README.md                    |  25 +-
 tooling/deploy/deploy-compose.sh            |   3 +-
 tooling/deploy/deploy.env.example           |   5 +
 17 files changed, 447 insertions(+), 538 deletions(-)
 delete mode 100644 docker-compose.cicd.yml
 create mode 100644 scripts/check-architecture-guardrails.mjs
 create mode 100644 tooling/deploy/deploy.env.example

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 5a4dcea..20d4b7a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,6 +15,27 @@ env:
   PNPM_VERSION: "9.14.2"
 
 jobs:
+  guardrails:
+    name: Architecture Guardrails
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: pnpm/action-setup@v4
+        with:
+          version: ${{ env.PNPM_VERSION }}
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          cache: pnpm
+
+      - name: Install dependencies
+        run: pnpm install --frozen-lockfile
+
+      - name: Check architecture guardrails
+        run: pnpm check:architecture
+
   # ──────────────────────────────────────────────
   # Typecheck — ~40s, no services needed
   # ──────────────────────────────────────────────
@@ -147,7 +168,7 @@ jobs:
   # ──────────────────────────────────────────────
   build:
     name: Build
-    needs: [typecheck]
+    needs: [guardrails, typecheck]
     runs-on: ubuntu-latest
     env:
       DATABASE_URL: postgresql://placeholder:placeholder@localhost:5432/placeholder
diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml
index 5985603..f0d70cd 100644
--- a/.github/workflows/deploy-prod.yml
+++ b/.github/workflows/deploy-prod.yml
@@ -44,7 +44,7 @@ jobs:
           ssh-keyscan -p "${SSH_PORT:-22}" -H "${SSH_HOST}" >> ~/.ssh/known_hosts
 
       - name: Bundle deploy assets
-        run: tar czf deploy-bundle.tgz docker-compose.cicd.yml tooling/deploy
+        run: tar czf deploy-bundle.tgz docker-compose.prod.yml tooling/deploy
 
       - name: Copy deploy assets to production
         env:
diff --git a/.github/workflows/deploy-staging.yml b/.github/workflows/deploy-staging.yml
index 46a33a3..2500314 100644
--- a/.github/workflows/deploy-staging.yml
+++ b/.github/workflows/deploy-staging.yml
@@ -44,7 +44,7 @@ jobs:
           ssh-keyscan -p "${SSH_PORT:-22}" -H "${SSH_HOST}" >> ~/.ssh/known_hosts
 
       - name: Bundle deploy assets
-        run: tar czf deploy-bundle.tgz docker-compose.cicd.yml tooling/deploy
+        run: tar czf deploy-bundle.tgz docker-compose.prod.yml tooling/deploy
 
       - name: Copy deploy assets to staging
         env:
diff --git a/.github/workflows/release-image.yml b/.github/workflows/release-image.yml
index 78f76e7..930349d 100644
--- a/.github/workflows/release-image.yml
+++ b/.github/workflows/release-image.yml
@@ -1,6 +1,8 @@
 name: Release Image
 
 on:
+  push:
+    branches: [main]
   workflow_dispatch:
     inputs:
       image_tag:
@@ -61,3 +63,12 @@ jobs:
           tags: ${{ steps.vars.outputs.migrator_image }}
           cache-from: type=gha,scope=migrator-image
           cache-to: type=gha,mode=max,scope=migrator-image
+
+      - name: Publish release summary
+        run: |
+          {
+            echo "## Image release"
+            echo
+            echo "- App image: \`${{ steps.vars.outputs.app_image }}\`"
+            echo "- Migrator image: \`${{ steps.vars.outputs.migrator_image }}\`"
+          } >> "$GITHUB_STEP_SUMMARY"
diff --git a/docker-compose.cicd.yml b/docker-compose.cicd.yml
deleted file mode 100644
index 17ff2ab..0000000
--- a/docker-compose.cicd.yml
+++ /dev/null
@@ -1,77 +0,0 @@
-name: capakraken-cicd
-
-services:
-  postgres:
-    image: postgres:16-alpine
-    restart: unless-stopped
-    ports:
-      - "${POSTGRES_PORT:-5432}:5432"
-    environment:
-      POSTGRES_DB: capakraken
-      POSTGRES_USER: capakraken
-      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}
-    volumes:
-      - capakraken_prod_pgdata:/var/lib/postgresql/data
-    healthcheck:
-      test: ["CMD-SHELL", "pg_isready -U capakraken -d capakraken"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-      start_period: 10s
-
-  redis:
-    image: redis:7-alpine
-    restart: unless-stopped
-    ports:
-      - "${REDIS_PORT:-6379}:6379"
-    command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
-    volumes:
-      - capakraken_prod_redis:/data
-    healthcheck:
-      test: ["CMD", "redis-cli", "ping"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-      start_period: 5s
-
-  migrator:
-    image: ${MIGRATOR_IMAGE:?set MIGRATOR_IMAGE}
-    restart: "no"
-    env_file:
-      - .env.production
-    environment:
-      DATABASE_URL: postgresql://capakraken:${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}@postgres:5432/capakraken
-      REDIS_URL: redis://redis:6379
-    depends_on:
-      postgres:
-        condition: service_healthy
-      redis:
-        condition: service_healthy
-
-  app:
-    image: ${APP_IMAGE:?set APP_IMAGE}
-    restart: unless-stopped
-    ports:
-      - "${APP_HOST_PORT:-3000}:3000"
-    env_file:
-      - .env.production
-    environment:
-      DATABASE_URL: postgresql://capakraken:${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}@postgres:5432/capakraken
-      REDIS_URL: redis://redis:6379
-    depends_on:
-      postgres:
-        condition: service_healthy
-      redis:
-        condition: service_healthy
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:3000/api/ready"]
-      interval: 30s
-      timeout: 5s
-      retries: 3
-      start_period: 30s
-
-volumes:
-  capakraken_prod_pgdata:
-    name: capakraken_prod_pgdata
-  capakraken_prod_redis:
-    name: capakraken_prod_redis
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
index a17ee4d..7553086 100644
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -5,11 +5,11 @@ services:
     image: postgres:16-alpine
     restart: unless-stopped
     ports:
-      - "5432:5432"
+      - "${POSTGRES_PORT:-5432}:5432"
     environment:
       POSTGRES_DB: capakraken
       POSTGRES_USER: capakraken
-      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-changeme}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}
     command: >
       postgres
       -c log_connections=on
@@ -31,7 +31,7 @@ services:
     image: redis:7-alpine
     restart: unless-stopped
     ports:
-      - "6379:6379"
+      - "${REDIS_PORT:-6379}:6379"
     command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru
     volumes:
       - capakraken_prod_redis:/data
@@ -42,29 +42,45 @@ services:
       retries: 5
       start_period: 5s
 
-  app:
-    build:
-      context: .
-      dockerfile: Dockerfile.prod
-    restart: unless-stopped
-    ports:
-      - "3000:3000"
+  migrator:
+    image: ${MIGRATOR_IMAGE:?set MIGRATOR_IMAGE}
+    pull_policy: always
+    restart: "no"
     env_file:
       - .env.production
     environment:
-      DATABASE_URL: postgresql://capakraken:${POSTGRES_PASSWORD:-changeme}@postgres:5432/capakraken
+      DATABASE_URL: postgresql://capakraken:${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}@postgres:5432/capakraken
       REDIS_URL: redis://redis:6379
+      RATE_LIMIT_BACKEND: ${RATE_LIMIT_BACKEND:-redis}
+    depends_on:
+      postgres:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+
+  app:
+    image: ${APP_IMAGE:?set APP_IMAGE}
+    pull_policy: always
+    restart: unless-stopped
+    ports:
+      - "${APP_HOST_PORT:-3000}:3000"
+    env_file:
+      - .env.production
+    environment:
+      DATABASE_URL: postgresql://capakraken:${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}@postgres:5432/capakraken
+      REDIS_URL: redis://redis:6379
+      RATE_LIMIT_BACKEND: ${RATE_LIMIT_BACKEND:-redis}
     depends_on:
       postgres:
         condition: service_healthy
       redis:
         condition: service_healthy
     healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:3000/api/health"]
+      test: ["CMD", "curl", "-f", "http://localhost:3000/api/ready"]
       interval: 30s
       timeout: 5s
       retries: 3
-      start_period: 20s
+      start_period: 30s
 
 volumes:
   capakraken_prod_pgdata:
diff --git a/docs/README.md b/docs/README.md
index 4de8694..560a442 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -8,7 +8,7 @@
 | Topic | File | Use |
 |---|---|---|
 | AI excellence due diligence | [ai-excellence-due-diligence-roadmap.md](/home/hartmut/Documents/Copilot/capakraken/docs/ai-excellence-due-diligence-roadmap.md) | Frank quality assessment and cleanup roadmap toward a showcase AI-built project |
-| Target CI/CD architecture | [cicd-target-architecture.md](/home/hartmut/Documents/Copilot/capakraken/docs/cicd-target-architecture.md) | Proposed image-based build, deploy, and rollback flow |
+| Target CI/CD architecture | [cicd-target-architecture.md](/home/hartmut/Documents/Copilot/capakraken/docs/cicd-target-architecture.md) | Canonical image-based build, deploy, and rollback flow |
 | Active roadmap and open gaps | [product-roadmap.md](/home/hartmut/Documents/Copilot/capakraken/docs/product-roadmap.md) | Primary backlog and current delivery order |
 | Estimating system design | [estimating-extension-design.md](/home/hartmut/Documents/Copilot/capakraken/docs/estimating-extension-design.md) | Workbook analysis, field mapping, and implementation plan |
 | Dispo import implementation | [dispo-import-implementation.md](/home/hartmut/Documents/Copilot/capakraken/docs/dispo-import-implementation.md) | Clean-slate Dispo v2 import design, mapping rules, staging flow, and commit policy |
diff --git a/docs/ai-excellence-due-diligence-roadmap.md b/docs/ai-excellence-due-diligence-roadmap.md
index 70419a2..1652c0f 100644
--- a/docs/ai-excellence-due-diligence-roadmap.md
+++ b/docs/ai-excellence-due-diligence-roadmap.md
@@ -66,9 +66,9 @@ The previously critical SSE and browser parser coverage issues were addressed du
    Evidence: the current performance review identifies repeated in-memory filtering, broad invalidation, and heavyweight timeline/report derivations in [performance-optimization-review-2026-03-18.md](/home/hartmut/Documents/Copilot/capakraken/docs/performance-optimization-review-2026-03-18.md).
    Risk: user experience and infrastructure cost will degrade as data volume grows.
 
-3. Production delivery is still in transition.
-   Evidence: the current repo now has a target CI/CD path, but the old manual production path still coexists with the new image-based deploy model in [cicd-target-architecture.md](/home/hartmut/Documents/Copilot/capakraken/docs/cicd-target-architecture.md).
-   Risk: the operational source of truth is not yet singular.
+3. Rollback and incident drills still need to be exercised, even though the deployment path is now standardized.
+   Evidence: the canonical production path now runs through [release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml), [deploy-staging.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-staging.yml), [deploy-prod.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-prod.yml), and the single host compose file [docker-compose.prod.yml](/home/hartmut/Documents/Copilot/capakraken/docker-compose.prod.yml).
+   Risk: a clean architecture path still needs operator rehearsal before it becomes operationally boring under pressure.
 
 ## Overall Rating
 
@@ -92,7 +92,7 @@ The architecture is promising, but file size, router density, and compatibility
 
 ### Operational Maturity
 
-`7/10`
+`7.5/10`
 
 Good CI and improving deploy discipline are in place, but production standardization still needs one more step.
 
@@ -191,7 +191,6 @@ Target window: 1 to 2 weeks
 
 Goals:
 
-- complete the move to image-based deploys as the canonical path
 - document staging and production bootstrap as code, not tribal knowledge
 - ensure staging and production run the Redis-backed rate-limit path intentionally and monitor fallback usage
 - define rollback drills and incident response playbooks
diff --git a/docs/architecture-hardening-backlog.md b/docs/architecture-hardening-backlog.md
index 6436711..613f7ab 100644
--- a/docs/architecture-hardening-backlog.md
+++ b/docs/architecture-hardening-backlog.md
@@ -48,6 +48,7 @@
 - the country listing and country detail assistant helpers now live in their own domain module, keeping the remaining geo/readmodel lookups out of the monolithic assistant router without changing the assistant contract
 - the remaining vacation workflow and entitlement assistant helpers now live in their own domain module, leaving `packages/api/src/router/assistant-tools.ts` as an aggregator/composition layer instead of the last mixed monolithic execution block
 - API and auth rate limiting now prefer shared Redis-backed counters when `REDIS_URL` is configured, while retaining an in-memory fallback for local/degraded operation with focused regression coverage
+- production delivery is now consolidated on a single image-based compose path with automatic image publication on `main`, deploy-time readiness gating, and architecture guardrails that prevent host-side app builds from creeping back in
 
 ## Next Up
 
@@ -62,8 +63,7 @@ The remaining work is now structural rather than another quick batch:
 
 1. secrets and runtime configuration policy
 2. oversized router decomposition
-3. canonical image-based production delivery
-4. performance hotspot reduction
+3. performance hotspot reduction
 
 ## Working Rule
 
diff --git a/docs/ci-cd-manual.md b/docs/ci-cd-manual.md
index 25d27fd..32277f0 100644
--- a/docs/ci-cd-manual.md
+++ b/docs/ci-cd-manual.md
@@ -2,333 +2,193 @@
 
 ## Overview
 
-CapaKraken uses GitHub Actions for continuous integration and Docker for deployment. This document covers the full pipeline from code push to production.
+This is the operational runbook for the canonical CapaKraken delivery path:
 
----
+1. CI validates every PR.
+2. Every push to `main` publishes immutable release images.
+3. Staging deploys one `sha-<commit>` tag.
+4. Production promotes the same tag.
+5. The host never builds application code from Git.
 
-## 1. CI Pipeline (Automatic on every PR)
+## 1. CI Gate
 
-### What triggers it
+The merge gate is [ci.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/ci.yml).
 
-| Event | Trigger |
-|-------|---------|
-| Pull request to `main` | All CI jobs run |
-| Push to `main` | All CI jobs run |
+It covers:
 
-### Jobs and their purpose
+- architecture guardrails
+- typecheck
+- lint
+- unit tests
+- build
+- E2E
 
-```
-PR opened / pushed
-    │
-    ├──→ typecheck    (tsc --noEmit, ~40s)
-    ├──→ lint         (ESLint via Turborepo, ~20s)
-    ├──→ test         (Vitest unit tests, ~60s, needs PostgreSQL + Redis)
-    │
-    └──→ build        (next build, ~90s, runs after typecheck)
-            │
-            └──→ e2e  (Playwright, ~3-5min, runs after build)
-```
+Before merging, all required checks must pass.
 
-**typecheck, lint, and test run in parallel** for speed. Build waits for typecheck. E2E waits for build.
-
-### What each job checks
-
-| Job | Command | What it catches |
-|-----|---------|----------------|
-| **typecheck** | `pnpm --filter @capakraken/web exec tsc --noEmit` | Type errors across the full web app |
-| **lint** | `pnpm lint` | Code style violations, unused imports, etc. |
-| **test** | `pnpm test:unit` | Unit test failures in engine, staffing, API, shared |
-| **build** | `pnpm --filter @capakraken/web exec next build` | SSR errors, dynamic import issues, bundle problems |
-| **e2e** | `pnpm test:e2e` | End-to-end user flow regressions |
-
-### Required status checks
-
-Before merging a PR, **all 5 jobs must pass**. Configure this in GitHub Settings > Branches > Branch protection rules > Require status checks.
-
-### Caching
-
-The pipeline caches these artifacts to speed up subsequent runs:
-
-| Cache | Key | Saves |
-|-------|-----|-------|
-| pnpm store | `pnpm-lock.yaml` hash | ~30s install time |
-| Turborepo | `.turbo` directory | ~60s on unchanged packages |
-| Playwright browsers | Playwright version | ~45s browser download |
-
----
-
-## 2. Local Development Quality Gates
-
-Run these before pushing to catch issues early:
+Useful local commands:
 
 ```bash
-# Quick check (< 2 min)
-pnpm --filter @capakraken/web exec tsc --noEmit && pnpm lint
-
-# Full check (< 3 min)
+pnpm --filter @capakraken/web exec tsc --project tsconfig.typecheck.json --noEmit
+pnpm lint
 pnpm test:unit
-
-# Full check including build (< 5 min)
 pnpm --filter @capakraken/web exec next build
 ```
 
-### Pre-commit hook (optional)
+## 2. Image Release
 
-You can add a Git pre-commit hook to run the quick check automatically:
+[release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml) runs automatically on every push to `main`.
 
-```bash
-# .husky/pre-commit
-pnpm --filter @capakraken/web exec tsc --noEmit
-pnpm lint
+It publishes:
+
+- `ghcr.io/<owner>/<repo>-app:sha-<commit>`
+- `ghcr.io/<owner>/<repo>-migrator:sha-<commit>`
+
+The workflow is also callable manually if a rebuild or tag override is needed.
+
+## 3. Host Bootstrap
+
+Each deploy target should have a dedicated directory such as `/opt/capakraken` containing:
+
+```text
+docker-compose.prod.yml
+.env.production
+deploy.env
+tooling/deploy/deploy-compose.sh
 ```
 
----
+Use these examples from the repo:
 
-## 3. Health Check Endpoints
+- [tooling/deploy/.env.production.example](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/.env.production.example)
+- [tooling/deploy/deploy.env.example](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy.env.example)
 
-Two endpoints are available for monitoring:
+Important host-side rules:
 
-### GET `/api/health` — Liveness Probe
+- keep `RATE_LIMIT_BACKEND=redis`
+- keep runtime secrets in `.env.production` or the platform secret layer
+- do not rotate runtime secrets through admin settings
+- ensure the host can pull from `ghcr.io`
 
-Returns 200 if the Node.js process is running. No external dependencies checked.
-
-```json
-{ "status": "ok", "timestamp": "2026-03-19T10:00:00.000Z" }
-```
-
-**Use for:** Kubernetes/Docker liveness probe, uptime monitoring.
-
-### GET `/api/ready` — Readiness Probe
-
-Checks PostgreSQL and Redis connectivity. Returns 200 if all services are reachable, 503 if not.
-
-```json
-// Healthy
-{ "status": "ready", "postgres": "ok", "redis": "ok" }
-
-// Unhealthy
-{ "status": "not_ready", "postgres": "ok", "redis": "error" }
-```
-
-**Use for:** Kubernetes/Docker readiness probe, load balancer health checks, nginx upstream checks.
-
----
-
-## 4. Production Docker Build
-
-### Building the production image
-
-```bash
-# Build the image
-docker build -f Dockerfile.prod -t capakraken:latest .
-
-# Test it locally
-docker compose -f docker-compose.prod.yml up -d
-```
-
-### Image details
-
-| Property | Value |
-|----------|-------|
-| Base | `node:20-bookworm-slim` |
-| Size | ~150-200 MB (vs ~1.5 GB dev image) |
-| Output | Next.js standalone mode |
-| Healthcheck | `curl -f http://localhost:3000/api/health` |
-| Port | 3000 (internal), mapped to 3100 externally |
-
-### Environment variables
-
-The production image requires these environment variables:
-
-```env
-# Required
-DATABASE_URL=postgresql://user:pass@host:5432/capakraken
-REDIS_URL=redis://host:6379
-NEXTAUTH_URL=https://capakraken.your-domain.com
-NEXTAUTH_SECRET=<random-32-char-string>
-
-# Optional
-SENTRY_DSN=https://xxx@sentry.io/xxx
-SMTP_HOST=smtp.example.com
-SMTP_PORT=587
-SMTP_USER=notifications@example.com
-SMTP_PASSWORD=<password>
-SMTP_FROM=CapaKraken <notifications@example.com>
-OPENAI_API_KEY=<optional-if-openai-used>
-AZURE_OPENAI_API_KEY=<optional-if-azure-chat-used>
-AZURE_DALLE_API_KEY=<optional-if-azure-image-gen-used>
-GEMINI_API_KEY=<optional-if-gemini-used>
-ANONYMIZATION_SEED=<required-if-deterministic-anonymization-enabled>
-```
-
-Generate a secure `NEXTAUTH_SECRET`:
+Generate a secure `NEXTAUTH_SECRET` with:
 
 ```bash
 openssl rand -base64 32
 ```
 
-Runtime secret policy:
+## 4. Staging Deployment
 
-- production secrets are injected through the deployment environment or host secret store
-- admin settings must not be used to enter or rotate AI, SMTP, or anonymization secrets
-- the admin UI is only for status checks and cleanup of legacy database-stored secret values
+Standard path:
 
----
+1. merge to `main`
+2. wait for [release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml) to publish `sha-<commit>`
+3. run [deploy-staging.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-staging.yml) with that tag
 
-## 5. Deployment
+The workflow uploads:
 
-### docker-compose (simplest)
+- [docker-compose.prod.yml](/home/hartmut/Documents/Copilot/capakraken/docker-compose.prod.yml)
+- [tooling/deploy](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/README.md)
+- a short-lived `deploy.env`
+
+On the host, [deploy-compose.sh](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy-compose.sh):
+
+1. validates the rendered compose file
+2. pulls `APP_IMAGE` and `MIGRATOR_IMAGE`
+3. starts PostgreSQL and Redis
+4. runs Prisma migrations with the `migrator` image
+5. starts the app
+6. waits for `GET /api/ready`
+
+## 5. Production Promotion
+
+After staging is accepted:
+
+1. run [deploy-prod.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-prod.yml)
+2. use the exact same `sha-<commit>` tag
+3. verify `GET /api/ready`
+
+Production must promote the already-tested image, not rebuild from source.
+
+## 6. Manual Host Dry Run
+
+If you need to verify the host outside GitHub Actions:
 
 ```bash
-# On your server, after updating the host-side env/secret source
-git pull
-docker compose -f docker-compose.prod.yml up -d --build
+cp tooling/deploy/.env.production.example .env.production
+cp tooling/deploy/deploy.env.example deploy.env
+# fill in real secrets and image refs first
 
-# Run database migrations
-docker compose -f docker-compose.prod.yml exec app \
-  pnpm --filter @capakraken/db db:migrate:deploy
-
-# Seed initial data (first deployment only)
-docker compose -f docker-compose.prod.yml exec app \
-  pnpm db:seed
+set -a
+. ./deploy.env
+set +a
+bash tooling/deploy/deploy-compose.sh staging
 ```
 
-### Manual deployment (current setup)
+## 7. Health Endpoints
 
-Since `capakraken.hartmut-noerenberg.com` runs behind nginx:
+### GET `/api/health`
+
+Process liveness only. Use it for coarse uptime checks.
+
+### GET `/api/ready`
+
+Checks PostgreSQL and Redis connectivity. Use it for deploy readiness and traffic admission.
+
+For deploys, `/api/ready` is the source of truth.
+
+## 8. Rollback
+
+Rollback is image-based:
+
+1. choose the previous healthy `sha-<commit>`
+2. rerun the staging or production deploy workflow with that tag
+3. confirm `GET /api/ready`
+
+Schema changes still need expand-and-contract discipline for rollback safety.
+
+## 9. Troubleshooting
+
+### CI failure
+
+Run the failing command locally:
 
 ```bash
-# On the server
-cd /home/hartmut/Documents/Copilot/capakraken
-git pull origin main
-pnpm install
-pnpm db:generate
-pnpm db:validate
-pnpm --filter @capakraken/db db:migrate:deploy
-pnpm --filter @capakraken/web exec next build
-rm -rf apps/web/.next/cache  # clear stale cache
-
-# Restart the app (systemd, pm2, or manual)
-fuser -k 3100/tcp 2>/dev/null
-PORT=3100 pnpm --filter @capakraken/web start &
-```
-
-Use the repo-level `pnpm db:*` commands for Prisma/database operations. They load `.env`, `.env.local`, `.env.$NODE_ENV`, and `.env.$NODE_ENV.local` automatically before invoking Prisma.
-
-If you rotate runtime secrets during a manual deploy, update the host-side environment source first, then restart the app so the new process reads the updated values. Do not patch those values through admin settings.
-
-### nginx configuration
-
-The existing nginx reverse proxy should forward to port 3100:
-
-```nginx
-server {
-    server_name capakraken.hartmut-noerenberg.com;
-
-    location / {
-        proxy_pass http://127.0.0.1:3100;
-        proxy_http_version 1.1;
-        proxy_set_header Upgrade $http_upgrade;
-        proxy_set_header Connection "upgrade";
-        proxy_set_header Host $host;
-        proxy_set_header X-Real-IP $remote_addr;
-        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-        proxy_set_header X-Forwarded-Proto $scheme;
-
-        # SSE support (keep connection open)
-        proxy_read_timeout 86400s;
-        proxy_buffering off;
-    }
-}
-```
-
----
-
-## 6. Monitoring Setup
-
-### Sentry (error tracking)
-
-After creating a Sentry project, add the DSN to `.env.production`:
-
-```env
-SENTRY_DSN=https://xxx@sentry.io/xxx
-```
-
-Errors are automatically captured by the Sentry integration in Next.js.
-
-### Uptime monitoring
-
-Point an external monitor (UptimeRobot, Better Stack, etc.) at:
-
-```
-https://capakraken.hartmut-noerenberg.com/api/health
-```
-
-Alert if status code != 200 for more than 2 consecutive checks.
-
----
-
-## 7. Troubleshooting
-
-### CI job fails: "tsc --noEmit"
-
-TypeScript error in the web app. Run locally:
-```bash
-pnpm --filter @capakraken/web exec tsc --noEmit
-```
-
-### CI job fails: "test:unit"
-
-Unit test failure. Run locally:
-```bash
+pnpm --filter @capakraken/web exec tsc --project tsconfig.typecheck.json --noEmit
+pnpm lint
 pnpm test:unit
-```
-
-### CI job fails: "next build"
-
-Build error (often `ssr: false` in Server Components, missing exports). Run locally:
-```bash
 pnpm --filter @capakraken/web exec next build
 ```
 
-### CI job fails: "e2e"
+### Deploy fails before container start
 
-Playwright test failure. Check the HTML report artifact in the GitHub Actions run.
+Check the rendered compose configuration on the host:
 
-### Production: 502 Bad Gateway
-
-The Next.js process isn't running. Check:
 ```bash
-ss -tlnp | grep 3100  # Is anything listening?
-tail -50 /tmp/capakraken-dev.log  # Check app logs
+docker compose -f docker-compose.prod.yml config -q
 ```
 
-Restart:
+Then verify `.env.production` and `deploy.env`.
+
+### App never becomes ready
+
+Check:
+
 ```bash
-fuser -k 3100/tcp 2>/dev/null
-pnpm dev &  # or pnpm start for production mode
+docker compose -f docker-compose.prod.yml ps
+docker compose -f docker-compose.prod.yml logs --tail 200 app
+curl -s http://127.0.0.1:${APP_HOST_PORT:-3000}/api/ready
 ```
 
-### Production: 500 Internal Server Error
+### Database migration failure
+
+Inspect the migrator logs:
 
-Usually a stale Prisma client after schema changes:
 ```bash
-pnpm db:generate
-pnpm db:validate
-rm -rf apps/web/.next
-pnpm --filter @capakraken/web exec next build
-# Restart the server
+docker compose -f docker-compose.prod.yml run --rm migrator
 ```
 
-### Database connection issues
+### Registry pull failure
 
-Check the `/api/ready` endpoint:
-```bash
-curl -s https://capakraken.hartmut-noerenberg.com/api/ready | jq .
-```
+Verify `GHCR_USERNAME` and `GHCR_TOKEN`, then test:
 
-If `postgres: "error"`, verify:
 ```bash
-docker ps | grep postgres  # Is container running?
-psql -h localhost -p 5433 -U capakraken -d capakraken  # Can you connect?
+printf '%s\n' "$GHCR_TOKEN" | docker login ghcr.io -u "$GHCR_USERNAME" --password-stdin
 ```
diff --git a/docs/cicd-target-architecture.md b/docs/cicd-target-architecture.md
index ccbec44..2d69056 100644
--- a/docs/cicd-target-architecture.md
+++ b/docs/cicd-target-architecture.md
@@ -2,83 +2,67 @@
 
 ## Goal
 
-This document captures the intended delivery model for CapaKraken without replacing the currently working manual production setup immediately.
+This document describes the canonical release path for CapaKraken.
 
-The target state is:
+The release model is now:
 
-1. CI validates every PR.
-2. GitHub Actions builds immutable Docker images.
-3. Staging and production pull those exact images from a registry.
-4. Database migrations run as an explicit deploy step.
-5. Traffic is considered safe only after the app answers `GET /api/ready`.
+1. PRs are validated by CI before merge.
+2. Every push to `main` publishes immutable `app` and `migrator` images.
+3. Staging and production promote the exact same `sha-<commit>` tag.
+4. The host deploys only from images and runtime env files.
+5. A deployment is successful only after `GET /api/ready` passes.
 
-## Core Idea
-
-The production host should stop building application code from a Git checkout. Instead, it should only:
-
-- pull a versioned `app` image
-- pull a matching `migrator` image
-- run Prisma deploy migrations
-- start the application container
-- wait for readiness
-
-That removes "works on the server but not in CI" drift and makes rollbacks much simpler.
-
-## Delivery Flow
+## Canonical Flow
 
 ### 1. Pull Request Validation
 
-The existing `CI` workflow continues to validate:
+The main [ci.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/ci.yml) workflow remains the merge gate for:
 
-- architecture guardrails for SSE audience scoping
+- architecture guardrails
 - typecheck
 - lint
 - unit tests
 - build
 - E2E
 
-This remains the quality gate before merge.
+### 2. Automatic Image Release
 
-The guardrail step currently enforces three invariants:
+[release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml) now runs automatically on every push to `main` and can still be started manually for rebuilds or tag overrides.
 
-- no role-based SSE audience fan-out in [event-bus.ts](/home/hartmut/Documents/Copilot/capakraken/packages/api/src/sse/event-bus.ts)
-- no role-derived subscription audiences in [subscription-policy.ts](/home/hartmut/Documents/Copilot/capakraken/packages/api/src/sse/subscription-policy.ts)
-- no client-provided audience parsing in [route.ts](/home/hartmut/Documents/Copilot/capakraken/apps/web/src/app/api/sse/timeline/route.ts)
+It publishes two images from [Dockerfile.prod](/home/hartmut/Documents/Copilot/capakraken/Dockerfile.prod):
 
-### 2. Image Build
+- `ghcr.io/<owner>/<repo>-app:sha-<commit>`
+- `ghcr.io/<owner>/<repo>-migrator:sha-<commit>`
 
-The new manual workflow [release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml) builds two images from [Dockerfile.prod](/home/hartmut/Documents/Copilot/capakraken/Dockerfile.prod):
+### 3. Staging Promotion
 
-- `runner` target as the production app image
-- `migrator` target as the Prisma migration image
+[deploy-staging.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-staging.yml) copies the canonical deploy bundle to the staging host:
 
-Recommended tag format:
+- [docker-compose.prod.yml](/home/hartmut/Documents/Copilot/capakraken/docker-compose.prod.yml)
+- [tooling/deploy/deploy-compose.sh](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy-compose.sh)
+- the rest of [tooling/deploy](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/README.md)
 
-- `sha-<git-commit>`
+GitHub Actions also writes a short-lived `deploy.env` containing `APP_IMAGE`, `MIGRATOR_IMAGE`, and the host port.
 
-Example:
+### 4. Host-Side Deployment
 
-```text
-ghcr.io/<owner>/capakraken-app:sha-abc123
-ghcr.io/<owner>/capakraken-migrator:sha-abc123
-```
+On the target host, [deploy-compose.sh](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy-compose.sh):
 
-### 3. Staging Deploy
+1. loads `.env.production` and `deploy.env`
+2. validates the rendered compose file
+3. pulls the immutable `app` and `migrator` images
+4. starts PostgreSQL and Redis
+5. runs Prisma migrations through the dedicated `migrator` image
+6. starts the new `app` container
+7. waits for `GET /api/ready`
 
-The staging workflow [deploy-staging.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-staging.yml) is intended to:
+The host does not build application code from Git anymore.
 
-1. connect to the staging host over SSH
-2. copy the deploy assets
-3. export `APP_IMAGE` and `MIGRATOR_IMAGE`
-4. run [deploy-compose.sh](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy-compose.sh)
+### 5. Production Promotion
 
-The compose file used for this target flow is [docker-compose.cicd.yml](/home/hartmut/Documents/Copilot/capakraken/docker-compose.cicd.yml).
+[deploy-prod.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-prod.yml) repeats the exact staging flow with the same image tag after staging acceptance.
 
-### 4. Production Promotion
-
-The production workflow [deploy-prod.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-prod.yml) follows the same logic as staging, but the image tag is promoted manually.
-
-That means production uses an image that was already built and can already have been exercised in staging.
+That keeps staging and production on the same artifact instead of rebuilding.
 
 ## Required Infrastructure
 
@@ -86,139 +70,66 @@ That means production uses an image that was already built and can already have
 
 - GitHub repository with Actions enabled
 - GHCR or another container registry
-- 1 Linux host with Docker and Docker Compose
+- one Linux host with Docker Engine and Docker Compose v2
 - PostgreSQL
 - Redis
-- reverse proxy such as nginx
 - SSH access from GitHub Actions to the host
+- reverse proxy or load balancer in front of the app
 
 ### Recommended
 
 - separate staging and production hosts
 - GitHub Environments for `staging` and `production`
-- required reviewer approval for `production`
-- backup strategy for PostgreSQL volumes
-- uptime monitoring and error tracking
+- required approval for the `production` environment
+- monitoring on `/api/health` and `/api/ready`
+- PostgreSQL backup and restore drills
 
-## Secrets
+## Runtime Configuration
 
-### GitHub Environment Secrets
-
-For `staging`:
-
-- `STAGING_SSH_HOST`
-- `STAGING_SSH_PORT`
-- `STAGING_SSH_USER`
-- `STAGING_SSH_KEY`
-- `STAGING_DEPLOY_PATH`
-- `STAGING_APP_HOST_PORT`
-- `STAGING_GHCR_USERNAME`
-- `STAGING_GHCR_TOKEN`
-
-For `production`:
-
-- `PROD_SSH_HOST`
-- `PROD_SSH_PORT`
-- `PROD_SSH_USER`
-- `PROD_SSH_KEY`
-- `PROD_DEPLOY_PATH`
-- `PROD_APP_HOST_PORT`
-- `PROD_GHCR_USERNAME`
-- `PROD_GHCR_TOKEN`
-
-### Host-side Files
-
-Each target host should already have:
+The canonical host-side inputs are:
 
+- [docker-compose.prod.yml](/home/hartmut/Documents/Copilot/capakraken/docker-compose.prod.yml)
 - `.env.production`
-- Docker installed
-- network access to the container registry
+- `deploy.env`
 
-The repository now also contains a small host example at [tooling/deploy/.env.production.example](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/.env.production.example) and an operator note at [tooling/deploy/README.md](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/README.md).
+`.env.production` holds long-lived runtime configuration and secrets. The example file is [tooling/deploy/.env.production.example](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/.env.production.example).
 
-### Minimum Host Bootstrap
+`deploy.env` is short-lived deployment metadata. The example file is [tooling/deploy/deploy.env.example](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy.env.example).
 
-For each target host, create a dedicated deploy directory such as `/opt/capakraken` and place these files there:
+Important invariants:
 
-```text
-docker-compose.cicd.yml
-.env.production
-tooling/deploy/deploy-compose.sh
-```
-
-`.env.production` should hold the long-lived runtime settings, including:
-
-```env
-POSTGRES_PASSWORD=<long-random-password>
-NEXTAUTH_URL=https://capakraken.example.com
-NEXTAUTH_SECRET=<long-random-secret>
-```
-
-GitHub Actions only injects the short-lived image references through `deploy.env`. The deploy script then loads both files before calling Docker Compose, so compose interpolation and container runtime env use the same source of truth.
-
-### Runtime Secret Provisioning Policy
-
-Production and staging secrets should be provisioned at the host or platform-secret layer, not through admin mutations and not through application database writes.
-
-That includes at least:
-
-```env
-OPENAI_API_KEY=<optional-if-openai-used>
-AZURE_OPENAI_API_KEY=<optional-if-azure-chat-used>
-AZURE_DALLE_API_KEY=<optional-if-azure-image-gen-used>
-GEMINI_API_KEY=<optional-if-gemini-used>
-SMTP_PASSWORD=<required-if-smtp-auth-used>
-ANONYMIZATION_SEED=<required-if-deterministic-anonymization-enabled>
-```
-
-Operational rule:
-
-- keep these values in `.env.production` only for smaller self-managed hosts, or preferably in the host's secret manager / encrypted environment facility
-- do not rotate or patch these values through `SystemSettings`
-- use the admin settings page only to verify runtime source/status and to clear leftover legacy database copies
-- after migration, legacy database secret fields should be empty in both staging and production
+- `RATE_LIMIT_BACKEND=redis` should stay explicit in release environments
+- runtime AI, SMTP, and anonymization secrets belong to the host or platform secret layer
+- admin settings are for verification and legacy-secret cleanup, not for secret rotation
 
 ## Database Policy
 
-For release environments, use:
+Release environments must run migrations through the `migrator` image, which executes:
 
 ```bash
 pnpm --filter @capakraken/db db:migrate:deploy
 ```
 
-Do not use `db:push` as the main production deployment mechanism. `db:push` is convenient for local development, but it does not give the release traceability that a migration-based deploy requires.
+`db:push` remains a local-development tool, not a production rollout mechanism.
 
 ## Rollback Model
 
-Rollback should be image-based:
+Rollback is image-based:
 
-1. choose the previous good `sha-...` tag
-2. run the production deploy workflow again with that tag
-3. confirm readiness
+1. choose the previous healthy `sha-<commit>` tag
+2. redeploy staging or production with that tag
+3. confirm `GET /api/ready`
 
-This is only safe when schema changes follow backwards-compatible expand and contract rules.
+This assumes schema changes follow backwards-compatible expand-and-contract rollout rules.
 
-## How A Production Update Works
+## Production Update Summary
 
-The intended production update path is:
+The standard production update is:
 
-1. merge to `main` after the existing CI workflow is green
-2. run [release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml) to build immutable `app` and `migrator` images tagged as `sha-<commit>`
-3. run [deploy-staging.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-staging.yml) with that exact image tag
-4. GitHub Actions uploads the deploy bundle to the staging host and writes a temporary `deploy.env`
-5. [deploy-compose.sh](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy-compose.sh) pulls images, starts PostgreSQL and Redis, runs Prisma deploy migrations, starts the new app container, and waits for `GET /api/ready`
-6. after staging is accepted, run [deploy-prod.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-prod.yml) with the same tag
-7. production repeats the same image-based flow, so the running artifact matches staging
+1. merge to `main` after CI is green
+2. let [release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml) publish `sha-<commit>` images
+3. deploy that tag to staging through [deploy-staging.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-staging.yml)
+4. validate staging
+5. promote the same tag through [deploy-prod.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-prod.yml)
 
-That means the production host no longer builds from Git. It only receives a versioned image and starts it after migrations complete.
-
-The same principle applies to secrets: the running container reads them from the deployment environment at start time, so an update only needs a new image tag unless secret material itself is being rotated.
-
-## Current Status
-
-The repository now contains the CI/CD scaffolding, but the existing manual production setup remains untouched:
-
-- current manual compose flow: [docker-compose.prod.yml](/home/hartmut/Documents/Copilot/capakraken/docker-compose.prod.yml)
-- current manual runbook: [ci-cd-manual.md](/home/hartmut/Documents/Copilot/capakraken/docs/ci-cd-manual.md)
-
-This allows the team to introduce the new path gradually instead of switching production in one step.
+The important property is artifact identity: staging and production run the same image, not two separate builds.
diff --git a/package.json b/package.json
index 83fb155..359b856 100644
--- a/package.json
+++ b/package.json
@@ -9,6 +9,7 @@
     "test": "turbo run test:unit",
     "test:unit": "turbo test:unit",
     "test:e2e": "turbo test:e2e",
+    "check:architecture": "node ./scripts/check-architecture-guardrails.mjs",
     "db:doctor": "node ./scripts/db-doctor.mjs capakraken",
     "db:prisma": "node ./scripts/prisma-with-env.mjs",
     "db:push": "node ./scripts/with-env.mjs pnpm --filter @capakraken/db db:push",
diff --git a/scripts/check-architecture-guardrails.mjs b/scripts/check-architecture-guardrails.mjs
new file mode 100644
index 0000000..cc01075
--- /dev/null
+++ b/scripts/check-architecture-guardrails.mjs
@@ -0,0 +1,155 @@
+import { readFile } from "node:fs/promises";
+import path from "node:path";
+import process from "node:process";
+
+const rootDir = process.cwd();
+
+const rules = [
+  {
+    file: "packages/api/src/sse/event-bus.ts",
+    required: [],
+    forbidden: [
+      { pattern: /\bRoleSseAudience\b/, message: "role-based SSE audience types must not reappear" },
+      { pattern: /\broleAudience\s*\(/, message: "role-derived SSE audiences must not be emitted" },
+      { pattern: /\bBROADCAST_SENT\b/, message: "broadcast SSE event resurrection needs explicit architecture review" },
+    ],
+  },
+  {
+    file: "packages/api/src/sse/subscription-policy.ts",
+    required: [
+      {
+        pattern: /\bderiveUserSseSubscription\b/,
+        message: "subscription derivation must stay centralized in deriveUserSseSubscription",
+      },
+    ],
+    forbidden: [
+      { pattern: /\broleAudience\s*\(/, message: "subscription policy must not derive role audiences" },
+    ],
+  },
+  {
+    file: "apps/web/src/app/api/sse/timeline/route.ts",
+    required: [
+      {
+        pattern: /\bderiveUserSseSubscription\s*\(/,
+        message: "timeline SSE route must derive audiences server-side from the authenticated user",
+      },
+    ],
+    forbidden: [
+      { pattern: /\bsearchParams\b/, message: "timeline SSE route must not accept client-provided audience scoping" },
+      { pattern: /\baudience\b/, message: "timeline SSE route must not parse raw audience values from the client" },
+    ],
+  },
+  {
+    file: "docker-compose.prod.yml",
+    required: [
+      {
+        pattern: /image:\s+\$\{APP_IMAGE:\?set APP_IMAGE\}/,
+        message: "production compose must deploy the immutable app image",
+      },
+      {
+        pattern: /image:\s+\$\{MIGRATOR_IMAGE:\?set MIGRATOR_IMAGE\}/,
+        message: "production compose must deploy the immutable migrator image",
+      },
+      {
+        pattern: /http:\/\/localhost:3000\/api\/ready/,
+        message: "production compose must gate app health on the readiness endpoint",
+      },
+      {
+        pattern: /RATE_LIMIT_BACKEND:\s+\$\{RATE_LIMIT_BACKEND:-redis\}/,
+        message: "production compose must intentionally pin the Redis-backed rate-limit path",
+      },
+    ],
+    forbidden: [
+      { pattern: /\bbuild:/, message: "production compose must not build application images on the host" },
+    ],
+  },
+  {
+    file: ".github/workflows/release-image.yml",
+    required: [
+      {
+        pattern: /push:\s*\n\s*branches:\s*\[main\]/,
+        message: "image releases must build automatically on pushes to main",
+      },
+      {
+        pattern: /workflow_dispatch:/,
+        message: "image release must remain manually callable for rebuilds and tag overrides",
+      },
+      {
+        pattern: /target:\s+runner/,
+        message: "release workflow must keep publishing the runner image",
+      },
+      {
+        pattern: /target:\s+migrator/,
+        message: "release workflow must keep publishing the migrator image",
+      },
+    ],
+    forbidden: [],
+  },
+  {
+    file: ".github/workflows/deploy-staging.yml",
+    required: [
+      {
+        pattern: /docker-compose\.prod\.yml tooling\/deploy/,
+        message: "staging deploy must ship the canonical production compose bundle",
+      },
+    ],
+    forbidden: [],
+  },
+  {
+    file: ".github/workflows/deploy-prod.yml",
+    required: [
+      {
+        pattern: /docker-compose\.prod\.yml tooling\/deploy/,
+        message: "production deploy must ship the canonical production compose bundle",
+      },
+    ],
+    forbidden: [],
+  },
+  {
+    file: "tooling/deploy/deploy-compose.sh",
+    required: [
+      {
+        pattern: /COMPOSE_FILE="\$\{COMPOSE_FILE:-docker-compose\.prod\.yml\}"/,
+        message: "deploy script must default to the canonical production compose file",
+      },
+      {
+        pattern: /READY_URL="\$\{READY_URL:-http:\/\/127\.0\.0\.1:\$\{APP_HOST_PORT:-3000\}\/api\/ready\}"/,
+        message: "deploy script must wait on the readiness endpoint",
+      },
+      {
+        pattern: /docker compose -f "\$\{COMPOSE_FILE\}" config -q/,
+        message: "deploy script must validate the rendered compose file before pulling images",
+      },
+    ],
+    forbidden: [],
+  },
+];
+
+const violations = [];
+
+for (const rule of rules) {
+  const absolutePath = path.join(rootDir, rule.file);
+  const source = await readFile(absolutePath, "utf8");
+
+  for (const requirement of rule.required) {
+    if (!requirement.pattern.test(source)) {
+      violations.push(`${rule.file}: missing guardrail anchor: ${requirement.message}`);
+    }
+  }
+
+  for (const forbidden of rule.forbidden) {
+    if (forbidden.pattern.test(source)) {
+      violations.push(`${rule.file}: forbidden pattern matched: ${forbidden.message}`);
+    }
+  }
+}
+
+if (violations.length > 0) {
+  console.error("Architecture guardrail check failed:");
+  for (const violation of violations) {
+    console.error(`- ${violation}`);
+  }
+  process.exit(1);
+}
+
+console.log("Architecture guardrails passed.");
diff --git a/tooling/deploy/.env.production.example b/tooling/deploy/.env.production.example
index 515ec37..85310d2 100644
--- a/tooling/deploy/.env.production.example
+++ b/tooling/deploy/.env.production.example
@@ -1,8 +1,9 @@
-# Runtime settings consumed by the app and by docker-compose.cicd.yml on the target host.
+# Runtime settings consumed by the app and by docker-compose.prod.yml on the target host.
 
 POSTGRES_PASSWORD=replace-with-a-long-random-password
 NEXTAUTH_URL=https://capakraken.example.com
 NEXTAUTH_SECRET=replace-with-a-long-random-secret
+RATE_LIMIT_BACKEND=redis
 
 # Optional but commonly needed application settings.
 SENTRY_DSN=
diff --git a/tooling/deploy/README.md b/tooling/deploy/README.md
index 5f14611..8f36f73 100644
--- a/tooling/deploy/README.md
+++ b/tooling/deploy/README.md
@@ -1,11 +1,12 @@
 # Deploy Tooling
 
-This directory contains the additive deployment scaffold for the image-based CI/CD target path.
+This directory contains the canonical host-side tooling for the image-based staging and production path.
 
 ## Files
 
-- `deploy-compose.sh`: pulls images, runs migrations, starts the app, and waits for readiness
+- `deploy-compose.sh`: validates compose input, pulls images, runs migrations, starts the app, and waits for readiness
 - `.env.production.example`: example host-side runtime configuration
+- `deploy.env.example`: example short-lived deployment manifest written by GitHub Actions
 
 ## Host Layout
 
@@ -13,7 +14,7 @@ On the target host, the deploy directory should contain:
 
 ```text
 <deploy-path>/
-  docker-compose.cicd.yml
+  docker-compose.prod.yml
   deploy.env
   .env.production
   tooling/deploy/deploy-compose.sh
@@ -25,16 +26,20 @@ On the target host, the deploy directory should contain:
 
 1. Copy `tooling/deploy/.env.production.example` to the target host as `.env.production`.
 2. Fill in the required secrets and URLs.
-3. Provision runtime AI/SMTP/anonymization secrets on the host through `.env.production` or the platform's secret facility.
-4. Keep admin settings for status/verification only; do not use them to enter or rotate operational secrets.
-5. After migration, use the admin cleanup action to remove any legacy database-stored runtime secrets.
-6. Ensure Docker Engine and Docker Compose v2 are installed.
-7. Ensure the target host can pull from `ghcr.io`.
-8. Run the image release workflow, then the staging or production deploy workflow with the same image tag.
+3. Keep `RATE_LIMIT_BACKEND=redis` so production uses the shared counter path intentionally.
+4. Copy `tooling/deploy/deploy.env.example` to the host only if you want to dry-run the deploy script manually.
+5. Replace the placeholder images in `deploy.env.example` with a real `sha-<commit>` tag and save it as `deploy.env` for a manual dry run.
+6. Provision runtime AI/SMTP/anonymization secrets on the host through `.env.production` or the platform's secret facility.
+7. Keep admin settings for status/verification only; do not use them to enter or rotate operational secrets.
+8. After migration, use the admin cleanup action to remove any legacy database-stored runtime secrets.
+9. Ensure Docker Engine and Docker Compose v2 are installed.
+10. Ensure the target host can pull from `ghcr.io`.
+11. A normal release no longer needs a Git checkout on the host. The host only needs the deploy bundle plus the two env files.
+12. Merge to `main`, let `release-image.yml` publish the immutable images, then run the staging or production deploy workflow with the same image tag.
 
 ## Manual Host Test
 
-After the files are present on the host, the flow can be tested manually:
+After the files are present on the host, the canonical flow can be tested manually:
 
 ```bash
 set -a
diff --git a/tooling/deploy/deploy-compose.sh b/tooling/deploy/deploy-compose.sh
index f49dd8e..7ef1586 100755
--- a/tooling/deploy/deploy-compose.sh
+++ b/tooling/deploy/deploy-compose.sh
@@ -2,7 +2,7 @@
 set -euo pipefail
 
 DEPLOY_ENV="${1:-unknown}"
-COMPOSE_FILE="${COMPOSE_FILE:-docker-compose.cicd.yml}"
+COMPOSE_FILE="${COMPOSE_FILE:-docker-compose.prod.yml}"
 APP_ENV_FILE="${APP_ENV_FILE:-.env.production}"
 DEPLOY_ENV_FILE="${DEPLOY_ENV_FILE:-deploy.env}"
 READY_URL="${READY_URL:-http://127.0.0.1:${APP_HOST_PORT:-3000}/api/ready}"
@@ -36,6 +36,7 @@ if [ -n "${GHCR_USERNAME:-}" ] && [ -n "${GHCR_TOKEN:-}" ]; then
   printf '%s\n' "${GHCR_TOKEN}" | docker login ghcr.io -u "${GHCR_USERNAME}" --password-stdin
 fi
 
+docker compose -f "${COMPOSE_FILE}" config -q
 docker compose -f "${COMPOSE_FILE}" pull app migrator
 docker compose -f "${COMPOSE_FILE}" up -d postgres redis
 docker compose -f "${COMPOSE_FILE}" run --rm migrator
diff --git a/tooling/deploy/deploy.env.example b/tooling/deploy/deploy.env.example
new file mode 100644
index 0000000..2804f91
--- /dev/null
+++ b/tooling/deploy/deploy.env.example
@@ -0,0 +1,5 @@
+APP_IMAGE=ghcr.io/example/capakraken-app:sha-abc123
+MIGRATOR_IMAGE=ghcr.io/example/capakraken-migrator:sha-abc123
+APP_HOST_PORT=3000
+GHCR_USERNAME=
+GHCR_TOKEN=