From 7bcc831b5c60d5828581bdbe79593a6970df1429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hartmut=20N=C3=B6renberg?= Date: Mon, 30 Mar 2026 23:35:29 +0200 Subject: [PATCH] refactor(ops): standardize image-based production delivery --- .github/workflows/ci.yml | 23 +- .github/workflows/deploy-prod.yml | 2 +- .github/workflows/deploy-staging.yml | 2 +- .github/workflows/release-image.yml | 11 + docker-compose.cicd.yml | 77 ---- docker-compose.prod.yml | 42 +- docs/README.md | 2 +- docs/ai-excellence-due-diligence-roadmap.md | 9 +- docs/architecture-hardening-backlog.md | 4 +- docs/ci-cd-manual.md | 404 +++++++------------- docs/cicd-target-architecture.md | 217 ++++------- package.json | 1 + scripts/check-architecture-guardrails.mjs | 155 ++++++++ tooling/deploy/.env.production.example | 3 +- tooling/deploy/README.md | 25 +- tooling/deploy/deploy-compose.sh | 3 +- tooling/deploy/deploy.env.example | 5 + 17 files changed, 447 insertions(+), 538 deletions(-) delete mode 100644 docker-compose.cicd.yml create mode 100644 scripts/check-architecture-guardrails.mjs create mode 100644 tooling/deploy/deploy.env.example diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5a4dcea..20d4b7a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,6 +15,27 @@ env: PNPM_VERSION: "9.14.2" jobs: + guardrails: + name: Architecture Guardrails + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: pnpm/action-setup@v4 + with: + version: ${{ env.PNPM_VERSION }} + + - uses: actions/setup-node@v4 + with: + node-version: ${{ env.NODE_VERSION }} + cache: pnpm + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Check architecture guardrails + run: pnpm check:architecture + # ────────────────────────────────────────────── # Typecheck — ~40s, no services needed # ────────────────────────────────────────────── @@ -147,7 +168,7 @@ jobs: # ────────────────────────────────────────────── build: name: Build - needs: [typecheck] + needs: [guardrails, typecheck] runs-on: ubuntu-latest env: DATABASE_URL: postgresql://placeholder:placeholder@localhost:5432/placeholder diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml index 5985603..f0d70cd 100644 --- a/.github/workflows/deploy-prod.yml +++ b/.github/workflows/deploy-prod.yml @@ -44,7 +44,7 @@ jobs: ssh-keyscan -p "${SSH_PORT:-22}" -H "${SSH_HOST}" >> ~/.ssh/known_hosts - name: Bundle deploy assets - run: tar czf deploy-bundle.tgz docker-compose.cicd.yml tooling/deploy + run: tar czf deploy-bundle.tgz docker-compose.prod.yml tooling/deploy - name: Copy deploy assets to production env: diff --git a/.github/workflows/deploy-staging.yml b/.github/workflows/deploy-staging.yml index 46a33a3..2500314 100644 --- a/.github/workflows/deploy-staging.yml +++ b/.github/workflows/deploy-staging.yml @@ -44,7 +44,7 @@ jobs: ssh-keyscan -p "${SSH_PORT:-22}" -H "${SSH_HOST}" >> ~/.ssh/known_hosts - name: Bundle deploy assets - run: tar czf deploy-bundle.tgz docker-compose.cicd.yml tooling/deploy + run: tar czf deploy-bundle.tgz docker-compose.prod.yml tooling/deploy - name: Copy deploy assets to staging env: diff --git a/.github/workflows/release-image.yml b/.github/workflows/release-image.yml index 78f76e7..930349d 100644 --- a/.github/workflows/release-image.yml +++ b/.github/workflows/release-image.yml @@ -1,6 +1,8 @@ name: Release Image on: + push: + branches: [main] workflow_dispatch: inputs: image_tag: @@ -61,3 +63,12 @@ jobs: tags: ${{ steps.vars.outputs.migrator_image }} cache-from: type=gha,scope=migrator-image cache-to: type=gha,mode=max,scope=migrator-image + + - name: Publish release summary + run: | + { + echo "## Image release" + echo + echo "- App image: \`${{ steps.vars.outputs.app_image }}\`" + echo "- Migrator image: \`${{ steps.vars.outputs.migrator_image }}\`" + } >> "$GITHUB_STEP_SUMMARY" diff --git a/docker-compose.cicd.yml b/docker-compose.cicd.yml deleted file mode 100644 index 17ff2ab..0000000 --- a/docker-compose.cicd.yml +++ /dev/null @@ -1,77 +0,0 @@ -name: capakraken-cicd - -services: - postgres: - image: postgres:16-alpine - restart: unless-stopped - ports: - - "${POSTGRES_PORT:-5432}:5432" - environment: - POSTGRES_DB: capakraken - POSTGRES_USER: capakraken - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD} - volumes: - - capakraken_prod_pgdata:/var/lib/postgresql/data - healthcheck: - test: ["CMD-SHELL", "pg_isready -U capakraken -d capakraken"] - interval: 10s - timeout: 5s - retries: 5 - start_period: 10s - - redis: - image: redis:7-alpine - restart: unless-stopped - ports: - - "${REDIS_PORT:-6379}:6379" - command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru - volumes: - - capakraken_prod_redis:/data - healthcheck: - test: ["CMD", "redis-cli", "ping"] - interval: 10s - timeout: 5s - retries: 5 - start_period: 5s - - migrator: - image: ${MIGRATOR_IMAGE:?set MIGRATOR_IMAGE} - restart: "no" - env_file: - - .env.production - environment: - DATABASE_URL: postgresql://capakraken:${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}@postgres:5432/capakraken - REDIS_URL: redis://redis:6379 - depends_on: - postgres: - condition: service_healthy - redis: - condition: service_healthy - - app: - image: ${APP_IMAGE:?set APP_IMAGE} - restart: unless-stopped - ports: - - "${APP_HOST_PORT:-3000}:3000" - env_file: - - .env.production - environment: - DATABASE_URL: postgresql://capakraken:${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}@postgres:5432/capakraken - REDIS_URL: redis://redis:6379 - depends_on: - postgres: - condition: service_healthy - redis: - condition: service_healthy - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:3000/api/ready"] - interval: 30s - timeout: 5s - retries: 3 - start_period: 30s - -volumes: - capakraken_prod_pgdata: - name: capakraken_prod_pgdata - capakraken_prod_redis: - name: capakraken_prod_redis diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index a17ee4d..7553086 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -5,11 +5,11 @@ services: image: postgres:16-alpine restart: unless-stopped ports: - - "5432:5432" + - "${POSTGRES_PORT:-5432}:5432" environment: POSTGRES_DB: capakraken POSTGRES_USER: capakraken - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-changeme} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD} command: > postgres -c log_connections=on @@ -31,7 +31,7 @@ services: image: redis:7-alpine restart: unless-stopped ports: - - "6379:6379" + - "${REDIS_PORT:-6379}:6379" command: redis-server --maxmemory 256mb --maxmemory-policy allkeys-lru volumes: - capakraken_prod_redis:/data @@ -42,29 +42,45 @@ services: retries: 5 start_period: 5s - app: - build: - context: . - dockerfile: Dockerfile.prod - restart: unless-stopped - ports: - - "3000:3000" + migrator: + image: ${MIGRATOR_IMAGE:?set MIGRATOR_IMAGE} + pull_policy: always + restart: "no" env_file: - .env.production environment: - DATABASE_URL: postgresql://capakraken:${POSTGRES_PASSWORD:-changeme}@postgres:5432/capakraken + DATABASE_URL: postgresql://capakraken:${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}@postgres:5432/capakraken REDIS_URL: redis://redis:6379 + RATE_LIMIT_BACKEND: ${RATE_LIMIT_BACKEND:-redis} + depends_on: + postgres: + condition: service_healthy + redis: + condition: service_healthy + + app: + image: ${APP_IMAGE:?set APP_IMAGE} + pull_policy: always + restart: unless-stopped + ports: + - "${APP_HOST_PORT:-3000}:3000" + env_file: + - .env.production + environment: + DATABASE_URL: postgresql://capakraken:${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD}@postgres:5432/capakraken + REDIS_URL: redis://redis:6379 + RATE_LIMIT_BACKEND: ${RATE_LIMIT_BACKEND:-redis} depends_on: postgres: condition: service_healthy redis: condition: service_healthy healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:3000/api/health"] + test: ["CMD", "curl", "-f", "http://localhost:3000/api/ready"] interval: 30s timeout: 5s retries: 3 - start_period: 20s + start_period: 30s volumes: capakraken_prod_pgdata: diff --git a/docs/README.md b/docs/README.md index 4de8694..560a442 100644 --- a/docs/README.md +++ b/docs/README.md @@ -8,7 +8,7 @@ | Topic | File | Use | |---|---|---| | AI excellence due diligence | [ai-excellence-due-diligence-roadmap.md](/home/hartmut/Documents/Copilot/capakraken/docs/ai-excellence-due-diligence-roadmap.md) | Frank quality assessment and cleanup roadmap toward a showcase AI-built project | -| Target CI/CD architecture | [cicd-target-architecture.md](/home/hartmut/Documents/Copilot/capakraken/docs/cicd-target-architecture.md) | Proposed image-based build, deploy, and rollback flow | +| Target CI/CD architecture | [cicd-target-architecture.md](/home/hartmut/Documents/Copilot/capakraken/docs/cicd-target-architecture.md) | Canonical image-based build, deploy, and rollback flow | | Active roadmap and open gaps | [product-roadmap.md](/home/hartmut/Documents/Copilot/capakraken/docs/product-roadmap.md) | Primary backlog and current delivery order | | Estimating system design | [estimating-extension-design.md](/home/hartmut/Documents/Copilot/capakraken/docs/estimating-extension-design.md) | Workbook analysis, field mapping, and implementation plan | | Dispo import implementation | [dispo-import-implementation.md](/home/hartmut/Documents/Copilot/capakraken/docs/dispo-import-implementation.md) | Clean-slate Dispo v2 import design, mapping rules, staging flow, and commit policy | diff --git a/docs/ai-excellence-due-diligence-roadmap.md b/docs/ai-excellence-due-diligence-roadmap.md index 70419a2..1652c0f 100644 --- a/docs/ai-excellence-due-diligence-roadmap.md +++ b/docs/ai-excellence-due-diligence-roadmap.md @@ -66,9 +66,9 @@ The previously critical SSE and browser parser coverage issues were addressed du Evidence: the current performance review identifies repeated in-memory filtering, broad invalidation, and heavyweight timeline/report derivations in [performance-optimization-review-2026-03-18.md](/home/hartmut/Documents/Copilot/capakraken/docs/performance-optimization-review-2026-03-18.md). Risk: user experience and infrastructure cost will degrade as data volume grows. -3. Production delivery is still in transition. - Evidence: the current repo now has a target CI/CD path, but the old manual production path still coexists with the new image-based deploy model in [cicd-target-architecture.md](/home/hartmut/Documents/Copilot/capakraken/docs/cicd-target-architecture.md). - Risk: the operational source of truth is not yet singular. +3. Rollback and incident drills still need to be exercised, even though the deployment path is now standardized. + Evidence: the canonical production path now runs through [release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml), [deploy-staging.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-staging.yml), [deploy-prod.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-prod.yml), and the single host compose file [docker-compose.prod.yml](/home/hartmut/Documents/Copilot/capakraken/docker-compose.prod.yml). + Risk: a clean architecture path still needs operator rehearsal before it becomes operationally boring under pressure. ## Overall Rating @@ -92,7 +92,7 @@ The architecture is promising, but file size, router density, and compatibility ### Operational Maturity -`7/10` +`7.5/10` Good CI and improving deploy discipline are in place, but production standardization still needs one more step. @@ -191,7 +191,6 @@ Target window: 1 to 2 weeks Goals: -- complete the move to image-based deploys as the canonical path - document staging and production bootstrap as code, not tribal knowledge - ensure staging and production run the Redis-backed rate-limit path intentionally and monitor fallback usage - define rollback drills and incident response playbooks diff --git a/docs/architecture-hardening-backlog.md b/docs/architecture-hardening-backlog.md index 6436711..613f7ab 100644 --- a/docs/architecture-hardening-backlog.md +++ b/docs/architecture-hardening-backlog.md @@ -48,6 +48,7 @@ - the country listing and country detail assistant helpers now live in their own domain module, keeping the remaining geo/readmodel lookups out of the monolithic assistant router without changing the assistant contract - the remaining vacation workflow and entitlement assistant helpers now live in their own domain module, leaving `packages/api/src/router/assistant-tools.ts` as an aggregator/composition layer instead of the last mixed monolithic execution block - API and auth rate limiting now prefer shared Redis-backed counters when `REDIS_URL` is configured, while retaining an in-memory fallback for local/degraded operation with focused regression coverage +- production delivery is now consolidated on a single image-based compose path with automatic image publication on `main`, deploy-time readiness gating, and architecture guardrails that prevent host-side app builds from creeping back in ## Next Up @@ -62,8 +63,7 @@ The remaining work is now structural rather than another quick batch: 1. secrets and runtime configuration policy 2. oversized router decomposition -3. canonical image-based production delivery -4. performance hotspot reduction +3. performance hotspot reduction ## Working Rule diff --git a/docs/ci-cd-manual.md b/docs/ci-cd-manual.md index 25d27fd..32277f0 100644 --- a/docs/ci-cd-manual.md +++ b/docs/ci-cd-manual.md @@ -2,333 +2,193 @@ ## Overview -CapaKraken uses GitHub Actions for continuous integration and Docker for deployment. This document covers the full pipeline from code push to production. +This is the operational runbook for the canonical CapaKraken delivery path: ---- +1. CI validates every PR. +2. Every push to `main` publishes immutable release images. +3. Staging deploys one `sha-` tag. +4. Production promotes the same tag. +5. The host never builds application code from Git. -## 1. CI Pipeline (Automatic on every PR) +## 1. CI Gate -### What triggers it +The merge gate is [ci.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/ci.yml). -| Event | Trigger | -|-------|---------| -| Pull request to `main` | All CI jobs run | -| Push to `main` | All CI jobs run | +It covers: -### Jobs and their purpose +- architecture guardrails +- typecheck +- lint +- unit tests +- build +- E2E -``` -PR opened / pushed - │ - ├──→ typecheck (tsc --noEmit, ~40s) - ├──→ lint (ESLint via Turborepo, ~20s) - ├──→ test (Vitest unit tests, ~60s, needs PostgreSQL + Redis) - │ - └──→ build (next build, ~90s, runs after typecheck) - │ - └──→ e2e (Playwright, ~3-5min, runs after build) -``` +Before merging, all required checks must pass. -**typecheck, lint, and test run in parallel** for speed. Build waits for typecheck. E2E waits for build. - -### What each job checks - -| Job | Command | What it catches | -|-----|---------|----------------| -| **typecheck** | `pnpm --filter @capakraken/web exec tsc --noEmit` | Type errors across the full web app | -| **lint** | `pnpm lint` | Code style violations, unused imports, etc. | -| **test** | `pnpm test:unit` | Unit test failures in engine, staffing, API, shared | -| **build** | `pnpm --filter @capakraken/web exec next build` | SSR errors, dynamic import issues, bundle problems | -| **e2e** | `pnpm test:e2e` | End-to-end user flow regressions | - -### Required status checks - -Before merging a PR, **all 5 jobs must pass**. Configure this in GitHub Settings > Branches > Branch protection rules > Require status checks. - -### Caching - -The pipeline caches these artifacts to speed up subsequent runs: - -| Cache | Key | Saves | -|-------|-----|-------| -| pnpm store | `pnpm-lock.yaml` hash | ~30s install time | -| Turborepo | `.turbo` directory | ~60s on unchanged packages | -| Playwright browsers | Playwright version | ~45s browser download | - ---- - -## 2. Local Development Quality Gates - -Run these before pushing to catch issues early: +Useful local commands: ```bash -# Quick check (< 2 min) -pnpm --filter @capakraken/web exec tsc --noEmit && pnpm lint - -# Full check (< 3 min) +pnpm --filter @capakraken/web exec tsc --project tsconfig.typecheck.json --noEmit +pnpm lint pnpm test:unit - -# Full check including build (< 5 min) pnpm --filter @capakraken/web exec next build ``` -### Pre-commit hook (optional) +## 2. Image Release -You can add a Git pre-commit hook to run the quick check automatically: +[release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml) runs automatically on every push to `main`. -```bash -# .husky/pre-commit -pnpm --filter @capakraken/web exec tsc --noEmit -pnpm lint +It publishes: + +- `ghcr.io//-app:sha-` +- `ghcr.io//-migrator:sha-` + +The workflow is also callable manually if a rebuild or tag override is needed. + +## 3. Host Bootstrap + +Each deploy target should have a dedicated directory such as `/opt/capakraken` containing: + +```text +docker-compose.prod.yml +.env.production +deploy.env +tooling/deploy/deploy-compose.sh ``` ---- +Use these examples from the repo: -## 3. Health Check Endpoints +- [tooling/deploy/.env.production.example](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/.env.production.example) +- [tooling/deploy/deploy.env.example](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy.env.example) -Two endpoints are available for monitoring: +Important host-side rules: -### GET `/api/health` — Liveness Probe +- keep `RATE_LIMIT_BACKEND=redis` +- keep runtime secrets in `.env.production` or the platform secret layer +- do not rotate runtime secrets through admin settings +- ensure the host can pull from `ghcr.io` -Returns 200 if the Node.js process is running. No external dependencies checked. - -```json -{ "status": "ok", "timestamp": "2026-03-19T10:00:00.000Z" } -``` - -**Use for:** Kubernetes/Docker liveness probe, uptime monitoring. - -### GET `/api/ready` — Readiness Probe - -Checks PostgreSQL and Redis connectivity. Returns 200 if all services are reachable, 503 if not. - -```json -// Healthy -{ "status": "ready", "postgres": "ok", "redis": "ok" } - -// Unhealthy -{ "status": "not_ready", "postgres": "ok", "redis": "error" } -``` - -**Use for:** Kubernetes/Docker readiness probe, load balancer health checks, nginx upstream checks. - ---- - -## 4. Production Docker Build - -### Building the production image - -```bash -# Build the image -docker build -f Dockerfile.prod -t capakraken:latest . - -# Test it locally -docker compose -f docker-compose.prod.yml up -d -``` - -### Image details - -| Property | Value | -|----------|-------| -| Base | `node:20-bookworm-slim` | -| Size | ~150-200 MB (vs ~1.5 GB dev image) | -| Output | Next.js standalone mode | -| Healthcheck | `curl -f http://localhost:3000/api/health` | -| Port | 3000 (internal), mapped to 3100 externally | - -### Environment variables - -The production image requires these environment variables: - -```env -# Required -DATABASE_URL=postgresql://user:pass@host:5432/capakraken -REDIS_URL=redis://host:6379 -NEXTAUTH_URL=https://capakraken.your-domain.com -NEXTAUTH_SECRET= - -# Optional -SENTRY_DSN=https://xxx@sentry.io/xxx -SMTP_HOST=smtp.example.com -SMTP_PORT=587 -SMTP_USER=notifications@example.com -SMTP_PASSWORD= -SMTP_FROM=CapaKraken -OPENAI_API_KEY= -AZURE_OPENAI_API_KEY= -AZURE_DALLE_API_KEY= -GEMINI_API_KEY= -ANONYMIZATION_SEED= -``` - -Generate a secure `NEXTAUTH_SECRET`: +Generate a secure `NEXTAUTH_SECRET` with: ```bash openssl rand -base64 32 ``` -Runtime secret policy: +## 4. Staging Deployment -- production secrets are injected through the deployment environment or host secret store -- admin settings must not be used to enter or rotate AI, SMTP, or anonymization secrets -- the admin UI is only for status checks and cleanup of legacy database-stored secret values +Standard path: ---- +1. merge to `main` +2. wait for [release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml) to publish `sha-` +3. run [deploy-staging.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-staging.yml) with that tag -## 5. Deployment +The workflow uploads: -### docker-compose (simplest) +- [docker-compose.prod.yml](/home/hartmut/Documents/Copilot/capakraken/docker-compose.prod.yml) +- [tooling/deploy](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/README.md) +- a short-lived `deploy.env` + +On the host, [deploy-compose.sh](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy-compose.sh): + +1. validates the rendered compose file +2. pulls `APP_IMAGE` and `MIGRATOR_IMAGE` +3. starts PostgreSQL and Redis +4. runs Prisma migrations with the `migrator` image +5. starts the app +6. waits for `GET /api/ready` + +## 5. Production Promotion + +After staging is accepted: + +1. run [deploy-prod.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-prod.yml) +2. use the exact same `sha-` tag +3. verify `GET /api/ready` + +Production must promote the already-tested image, not rebuild from source. + +## 6. Manual Host Dry Run + +If you need to verify the host outside GitHub Actions: ```bash -# On your server, after updating the host-side env/secret source -git pull -docker compose -f docker-compose.prod.yml up -d --build +cp tooling/deploy/.env.production.example .env.production +cp tooling/deploy/deploy.env.example deploy.env +# fill in real secrets and image refs first -# Run database migrations -docker compose -f docker-compose.prod.yml exec app \ - pnpm --filter @capakraken/db db:migrate:deploy - -# Seed initial data (first deployment only) -docker compose -f docker-compose.prod.yml exec app \ - pnpm db:seed +set -a +. ./deploy.env +set +a +bash tooling/deploy/deploy-compose.sh staging ``` -### Manual deployment (current setup) +## 7. Health Endpoints -Since `capakraken.hartmut-noerenberg.com` runs behind nginx: +### GET `/api/health` + +Process liveness only. Use it for coarse uptime checks. + +### GET `/api/ready` + +Checks PostgreSQL and Redis connectivity. Use it for deploy readiness and traffic admission. + +For deploys, `/api/ready` is the source of truth. + +## 8. Rollback + +Rollback is image-based: + +1. choose the previous healthy `sha-` +2. rerun the staging or production deploy workflow with that tag +3. confirm `GET /api/ready` + +Schema changes still need expand-and-contract discipline for rollback safety. + +## 9. Troubleshooting + +### CI failure + +Run the failing command locally: ```bash -# On the server -cd /home/hartmut/Documents/Copilot/capakraken -git pull origin main -pnpm install -pnpm db:generate -pnpm db:validate -pnpm --filter @capakraken/db db:migrate:deploy -pnpm --filter @capakraken/web exec next build -rm -rf apps/web/.next/cache # clear stale cache - -# Restart the app (systemd, pm2, or manual) -fuser -k 3100/tcp 2>/dev/null -PORT=3100 pnpm --filter @capakraken/web start & -``` - -Use the repo-level `pnpm db:*` commands for Prisma/database operations. They load `.env`, `.env.local`, `.env.$NODE_ENV`, and `.env.$NODE_ENV.local` automatically before invoking Prisma. - -If you rotate runtime secrets during a manual deploy, update the host-side environment source first, then restart the app so the new process reads the updated values. Do not patch those values through admin settings. - -### nginx configuration - -The existing nginx reverse proxy should forward to port 3100: - -```nginx -server { - server_name capakraken.hartmut-noerenberg.com; - - location / { - proxy_pass http://127.0.0.1:3100; - proxy_http_version 1.1; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - - # SSE support (keep connection open) - proxy_read_timeout 86400s; - proxy_buffering off; - } -} -``` - ---- - -## 6. Monitoring Setup - -### Sentry (error tracking) - -After creating a Sentry project, add the DSN to `.env.production`: - -```env -SENTRY_DSN=https://xxx@sentry.io/xxx -``` - -Errors are automatically captured by the Sentry integration in Next.js. - -### Uptime monitoring - -Point an external monitor (UptimeRobot, Better Stack, etc.) at: - -``` -https://capakraken.hartmut-noerenberg.com/api/health -``` - -Alert if status code != 200 for more than 2 consecutive checks. - ---- - -## 7. Troubleshooting - -### CI job fails: "tsc --noEmit" - -TypeScript error in the web app. Run locally: -```bash -pnpm --filter @capakraken/web exec tsc --noEmit -``` - -### CI job fails: "test:unit" - -Unit test failure. Run locally: -```bash +pnpm --filter @capakraken/web exec tsc --project tsconfig.typecheck.json --noEmit +pnpm lint pnpm test:unit -``` - -### CI job fails: "next build" - -Build error (often `ssr: false` in Server Components, missing exports). Run locally: -```bash pnpm --filter @capakraken/web exec next build ``` -### CI job fails: "e2e" +### Deploy fails before container start -Playwright test failure. Check the HTML report artifact in the GitHub Actions run. +Check the rendered compose configuration on the host: -### Production: 502 Bad Gateway - -The Next.js process isn't running. Check: ```bash -ss -tlnp | grep 3100 # Is anything listening? -tail -50 /tmp/capakraken-dev.log # Check app logs +docker compose -f docker-compose.prod.yml config -q ``` -Restart: +Then verify `.env.production` and `deploy.env`. + +### App never becomes ready + +Check: + ```bash -fuser -k 3100/tcp 2>/dev/null -pnpm dev & # or pnpm start for production mode +docker compose -f docker-compose.prod.yml ps +docker compose -f docker-compose.prod.yml logs --tail 200 app +curl -s http://127.0.0.1:${APP_HOST_PORT:-3000}/api/ready ``` -### Production: 500 Internal Server Error +### Database migration failure + +Inspect the migrator logs: -Usually a stale Prisma client after schema changes: ```bash -pnpm db:generate -pnpm db:validate -rm -rf apps/web/.next -pnpm --filter @capakraken/web exec next build -# Restart the server +docker compose -f docker-compose.prod.yml run --rm migrator ``` -### Database connection issues +### Registry pull failure -Check the `/api/ready` endpoint: -```bash -curl -s https://capakraken.hartmut-noerenberg.com/api/ready | jq . -``` +Verify `GHCR_USERNAME` and `GHCR_TOKEN`, then test: -If `postgres: "error"`, verify: ```bash -docker ps | grep postgres # Is container running? -psql -h localhost -p 5433 -U capakraken -d capakraken # Can you connect? +printf '%s\n' "$GHCR_TOKEN" | docker login ghcr.io -u "$GHCR_USERNAME" --password-stdin ``` diff --git a/docs/cicd-target-architecture.md b/docs/cicd-target-architecture.md index ccbec44..2d69056 100644 --- a/docs/cicd-target-architecture.md +++ b/docs/cicd-target-architecture.md @@ -2,83 +2,67 @@ ## Goal -This document captures the intended delivery model for CapaKraken without replacing the currently working manual production setup immediately. +This document describes the canonical release path for CapaKraken. -The target state is: +The release model is now: -1. CI validates every PR. -2. GitHub Actions builds immutable Docker images. -3. Staging and production pull those exact images from a registry. -4. Database migrations run as an explicit deploy step. -5. Traffic is considered safe only after the app answers `GET /api/ready`. +1. PRs are validated by CI before merge. +2. Every push to `main` publishes immutable `app` and `migrator` images. +3. Staging and production promote the exact same `sha-` tag. +4. The host deploys only from images and runtime env files. +5. A deployment is successful only after `GET /api/ready` passes. -## Core Idea - -The production host should stop building application code from a Git checkout. Instead, it should only: - -- pull a versioned `app` image -- pull a matching `migrator` image -- run Prisma deploy migrations -- start the application container -- wait for readiness - -That removes "works on the server but not in CI" drift and makes rollbacks much simpler. - -## Delivery Flow +## Canonical Flow ### 1. Pull Request Validation -The existing `CI` workflow continues to validate: +The main [ci.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/ci.yml) workflow remains the merge gate for: -- architecture guardrails for SSE audience scoping +- architecture guardrails - typecheck - lint - unit tests - build - E2E -This remains the quality gate before merge. +### 2. Automatic Image Release -The guardrail step currently enforces three invariants: +[release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml) now runs automatically on every push to `main` and can still be started manually for rebuilds or tag overrides. -- no role-based SSE audience fan-out in [event-bus.ts](/home/hartmut/Documents/Copilot/capakraken/packages/api/src/sse/event-bus.ts) -- no role-derived subscription audiences in [subscription-policy.ts](/home/hartmut/Documents/Copilot/capakraken/packages/api/src/sse/subscription-policy.ts) -- no client-provided audience parsing in [route.ts](/home/hartmut/Documents/Copilot/capakraken/apps/web/src/app/api/sse/timeline/route.ts) +It publishes two images from [Dockerfile.prod](/home/hartmut/Documents/Copilot/capakraken/Dockerfile.prod): -### 2. Image Build +- `ghcr.io//-app:sha-` +- `ghcr.io//-migrator:sha-` -The new manual workflow [release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml) builds two images from [Dockerfile.prod](/home/hartmut/Documents/Copilot/capakraken/Dockerfile.prod): +### 3. Staging Promotion -- `runner` target as the production app image -- `migrator` target as the Prisma migration image +[deploy-staging.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-staging.yml) copies the canonical deploy bundle to the staging host: -Recommended tag format: +- [docker-compose.prod.yml](/home/hartmut/Documents/Copilot/capakraken/docker-compose.prod.yml) +- [tooling/deploy/deploy-compose.sh](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy-compose.sh) +- the rest of [tooling/deploy](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/README.md) -- `sha-` +GitHub Actions also writes a short-lived `deploy.env` containing `APP_IMAGE`, `MIGRATOR_IMAGE`, and the host port. -Example: +### 4. Host-Side Deployment -```text -ghcr.io//capakraken-app:sha-abc123 -ghcr.io//capakraken-migrator:sha-abc123 -``` +On the target host, [deploy-compose.sh](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy-compose.sh): -### 3. Staging Deploy +1. loads `.env.production` and `deploy.env` +2. validates the rendered compose file +3. pulls the immutable `app` and `migrator` images +4. starts PostgreSQL and Redis +5. runs Prisma migrations through the dedicated `migrator` image +6. starts the new `app` container +7. waits for `GET /api/ready` -The staging workflow [deploy-staging.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-staging.yml) is intended to: +The host does not build application code from Git anymore. -1. connect to the staging host over SSH -2. copy the deploy assets -3. export `APP_IMAGE` and `MIGRATOR_IMAGE` -4. run [deploy-compose.sh](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy-compose.sh) +### 5. Production Promotion -The compose file used for this target flow is [docker-compose.cicd.yml](/home/hartmut/Documents/Copilot/capakraken/docker-compose.cicd.yml). +[deploy-prod.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-prod.yml) repeats the exact staging flow with the same image tag after staging acceptance. -### 4. Production Promotion - -The production workflow [deploy-prod.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-prod.yml) follows the same logic as staging, but the image tag is promoted manually. - -That means production uses an image that was already built and can already have been exercised in staging. +That keeps staging and production on the same artifact instead of rebuilding. ## Required Infrastructure @@ -86,139 +70,66 @@ That means production uses an image that was already built and can already have - GitHub repository with Actions enabled - GHCR or another container registry -- 1 Linux host with Docker and Docker Compose +- one Linux host with Docker Engine and Docker Compose v2 - PostgreSQL - Redis -- reverse proxy such as nginx - SSH access from GitHub Actions to the host +- reverse proxy or load balancer in front of the app ### Recommended - separate staging and production hosts - GitHub Environments for `staging` and `production` -- required reviewer approval for `production` -- backup strategy for PostgreSQL volumes -- uptime monitoring and error tracking +- required approval for the `production` environment +- monitoring on `/api/health` and `/api/ready` +- PostgreSQL backup and restore drills -## Secrets +## Runtime Configuration -### GitHub Environment Secrets - -For `staging`: - -- `STAGING_SSH_HOST` -- `STAGING_SSH_PORT` -- `STAGING_SSH_USER` -- `STAGING_SSH_KEY` -- `STAGING_DEPLOY_PATH` -- `STAGING_APP_HOST_PORT` -- `STAGING_GHCR_USERNAME` -- `STAGING_GHCR_TOKEN` - -For `production`: - -- `PROD_SSH_HOST` -- `PROD_SSH_PORT` -- `PROD_SSH_USER` -- `PROD_SSH_KEY` -- `PROD_DEPLOY_PATH` -- `PROD_APP_HOST_PORT` -- `PROD_GHCR_USERNAME` -- `PROD_GHCR_TOKEN` - -### Host-side Files - -Each target host should already have: +The canonical host-side inputs are: +- [docker-compose.prod.yml](/home/hartmut/Documents/Copilot/capakraken/docker-compose.prod.yml) - `.env.production` -- Docker installed -- network access to the container registry +- `deploy.env` -The repository now also contains a small host example at [tooling/deploy/.env.production.example](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/.env.production.example) and an operator note at [tooling/deploy/README.md](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/README.md). +`.env.production` holds long-lived runtime configuration and secrets. The example file is [tooling/deploy/.env.production.example](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/.env.production.example). -### Minimum Host Bootstrap +`deploy.env` is short-lived deployment metadata. The example file is [tooling/deploy/deploy.env.example](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy.env.example). -For each target host, create a dedicated deploy directory such as `/opt/capakraken` and place these files there: +Important invariants: -```text -docker-compose.cicd.yml -.env.production -tooling/deploy/deploy-compose.sh -``` - -`.env.production` should hold the long-lived runtime settings, including: - -```env -POSTGRES_PASSWORD= -NEXTAUTH_URL=https://capakraken.example.com -NEXTAUTH_SECRET= -``` - -GitHub Actions only injects the short-lived image references through `deploy.env`. The deploy script then loads both files before calling Docker Compose, so compose interpolation and container runtime env use the same source of truth. - -### Runtime Secret Provisioning Policy - -Production and staging secrets should be provisioned at the host or platform-secret layer, not through admin mutations and not through application database writes. - -That includes at least: - -```env -OPENAI_API_KEY= -AZURE_OPENAI_API_KEY= -AZURE_DALLE_API_KEY= -GEMINI_API_KEY= -SMTP_PASSWORD= -ANONYMIZATION_SEED= -``` - -Operational rule: - -- keep these values in `.env.production` only for smaller self-managed hosts, or preferably in the host's secret manager / encrypted environment facility -- do not rotate or patch these values through `SystemSettings` -- use the admin settings page only to verify runtime source/status and to clear leftover legacy database copies -- after migration, legacy database secret fields should be empty in both staging and production +- `RATE_LIMIT_BACKEND=redis` should stay explicit in release environments +- runtime AI, SMTP, and anonymization secrets belong to the host or platform secret layer +- admin settings are for verification and legacy-secret cleanup, not for secret rotation ## Database Policy -For release environments, use: +Release environments must run migrations through the `migrator` image, which executes: ```bash pnpm --filter @capakraken/db db:migrate:deploy ``` -Do not use `db:push` as the main production deployment mechanism. `db:push` is convenient for local development, but it does not give the release traceability that a migration-based deploy requires. +`db:push` remains a local-development tool, not a production rollout mechanism. ## Rollback Model -Rollback should be image-based: +Rollback is image-based: -1. choose the previous good `sha-...` tag -2. run the production deploy workflow again with that tag -3. confirm readiness +1. choose the previous healthy `sha-` tag +2. redeploy staging or production with that tag +3. confirm `GET /api/ready` -This is only safe when schema changes follow backwards-compatible expand and contract rules. +This assumes schema changes follow backwards-compatible expand-and-contract rollout rules. -## How A Production Update Works +## Production Update Summary -The intended production update path is: +The standard production update is: -1. merge to `main` after the existing CI workflow is green -2. run [release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml) to build immutable `app` and `migrator` images tagged as `sha-` -3. run [deploy-staging.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-staging.yml) with that exact image tag -4. GitHub Actions uploads the deploy bundle to the staging host and writes a temporary `deploy.env` -5. [deploy-compose.sh](/home/hartmut/Documents/Copilot/capakraken/tooling/deploy/deploy-compose.sh) pulls images, starts PostgreSQL and Redis, runs Prisma deploy migrations, starts the new app container, and waits for `GET /api/ready` -6. after staging is accepted, run [deploy-prod.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-prod.yml) with the same tag -7. production repeats the same image-based flow, so the running artifact matches staging +1. merge to `main` after CI is green +2. let [release-image.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/release-image.yml) publish `sha-` images +3. deploy that tag to staging through [deploy-staging.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-staging.yml) +4. validate staging +5. promote the same tag through [deploy-prod.yml](/home/hartmut/Documents/Copilot/capakraken/.github/workflows/deploy-prod.yml) -That means the production host no longer builds from Git. It only receives a versioned image and starts it after migrations complete. - -The same principle applies to secrets: the running container reads them from the deployment environment at start time, so an update only needs a new image tag unless secret material itself is being rotated. - -## Current Status - -The repository now contains the CI/CD scaffolding, but the existing manual production setup remains untouched: - -- current manual compose flow: [docker-compose.prod.yml](/home/hartmut/Documents/Copilot/capakraken/docker-compose.prod.yml) -- current manual runbook: [ci-cd-manual.md](/home/hartmut/Documents/Copilot/capakraken/docs/ci-cd-manual.md) - -This allows the team to introduce the new path gradually instead of switching production in one step. +The important property is artifact identity: staging and production run the same image, not two separate builds. diff --git a/package.json b/package.json index 83fb155..359b856 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "test": "turbo run test:unit", "test:unit": "turbo test:unit", "test:e2e": "turbo test:e2e", + "check:architecture": "node ./scripts/check-architecture-guardrails.mjs", "db:doctor": "node ./scripts/db-doctor.mjs capakraken", "db:prisma": "node ./scripts/prisma-with-env.mjs", "db:push": "node ./scripts/with-env.mjs pnpm --filter @capakraken/db db:push", diff --git a/scripts/check-architecture-guardrails.mjs b/scripts/check-architecture-guardrails.mjs new file mode 100644 index 0000000..cc01075 --- /dev/null +++ b/scripts/check-architecture-guardrails.mjs @@ -0,0 +1,155 @@ +import { readFile } from "node:fs/promises"; +import path from "node:path"; +import process from "node:process"; + +const rootDir = process.cwd(); + +const rules = [ + { + file: "packages/api/src/sse/event-bus.ts", + required: [], + forbidden: [ + { pattern: /\bRoleSseAudience\b/, message: "role-based SSE audience types must not reappear" }, + { pattern: /\broleAudience\s*\(/, message: "role-derived SSE audiences must not be emitted" }, + { pattern: /\bBROADCAST_SENT\b/, message: "broadcast SSE event resurrection needs explicit architecture review" }, + ], + }, + { + file: "packages/api/src/sse/subscription-policy.ts", + required: [ + { + pattern: /\bderiveUserSseSubscription\b/, + message: "subscription derivation must stay centralized in deriveUserSseSubscription", + }, + ], + forbidden: [ + { pattern: /\broleAudience\s*\(/, message: "subscription policy must not derive role audiences" }, + ], + }, + { + file: "apps/web/src/app/api/sse/timeline/route.ts", + required: [ + { + pattern: /\bderiveUserSseSubscription\s*\(/, + message: "timeline SSE route must derive audiences server-side from the authenticated user", + }, + ], + forbidden: [ + { pattern: /\bsearchParams\b/, message: "timeline SSE route must not accept client-provided audience scoping" }, + { pattern: /\baudience\b/, message: "timeline SSE route must not parse raw audience values from the client" }, + ], + }, + { + file: "docker-compose.prod.yml", + required: [ + { + pattern: /image:\s+\$\{APP_IMAGE:\?set APP_IMAGE\}/, + message: "production compose must deploy the immutable app image", + }, + { + pattern: /image:\s+\$\{MIGRATOR_IMAGE:\?set MIGRATOR_IMAGE\}/, + message: "production compose must deploy the immutable migrator image", + }, + { + pattern: /http:\/\/localhost:3000\/api\/ready/, + message: "production compose must gate app health on the readiness endpoint", + }, + { + pattern: /RATE_LIMIT_BACKEND:\s+\$\{RATE_LIMIT_BACKEND:-redis\}/, + message: "production compose must intentionally pin the Redis-backed rate-limit path", + }, + ], + forbidden: [ + { pattern: /\bbuild:/, message: "production compose must not build application images on the host" }, + ], + }, + { + file: ".github/workflows/release-image.yml", + required: [ + { + pattern: /push:\s*\n\s*branches:\s*\[main\]/, + message: "image releases must build automatically on pushes to main", + }, + { + pattern: /workflow_dispatch:/, + message: "image release must remain manually callable for rebuilds and tag overrides", + }, + { + pattern: /target:\s+runner/, + message: "release workflow must keep publishing the runner image", + }, + { + pattern: /target:\s+migrator/, + message: "release workflow must keep publishing the migrator image", + }, + ], + forbidden: [], + }, + { + file: ".github/workflows/deploy-staging.yml", + required: [ + { + pattern: /docker-compose\.prod\.yml tooling\/deploy/, + message: "staging deploy must ship the canonical production compose bundle", + }, + ], + forbidden: [], + }, + { + file: ".github/workflows/deploy-prod.yml", + required: [ + { + pattern: /docker-compose\.prod\.yml tooling\/deploy/, + message: "production deploy must ship the canonical production compose bundle", + }, + ], + forbidden: [], + }, + { + file: "tooling/deploy/deploy-compose.sh", + required: [ + { + pattern: /COMPOSE_FILE="\$\{COMPOSE_FILE:-docker-compose\.prod\.yml\}"/, + message: "deploy script must default to the canonical production compose file", + }, + { + pattern: /READY_URL="\$\{READY_URL:-http:\/\/127\.0\.0\.1:\$\{APP_HOST_PORT:-3000\}\/api\/ready\}"/, + message: "deploy script must wait on the readiness endpoint", + }, + { + pattern: /docker compose -f "\$\{COMPOSE_FILE\}" config -q/, + message: "deploy script must validate the rendered compose file before pulling images", + }, + ], + forbidden: [], + }, +]; + +const violations = []; + +for (const rule of rules) { + const absolutePath = path.join(rootDir, rule.file); + const source = await readFile(absolutePath, "utf8"); + + for (const requirement of rule.required) { + if (!requirement.pattern.test(source)) { + violations.push(`${rule.file}: missing guardrail anchor: ${requirement.message}`); + } + } + + for (const forbidden of rule.forbidden) { + if (forbidden.pattern.test(source)) { + violations.push(`${rule.file}: forbidden pattern matched: ${forbidden.message}`); + } + } +} + +if (violations.length > 0) { + console.error("Architecture guardrail check failed:"); + for (const violation of violations) { + console.error(`- ${violation}`); + } + process.exit(1); +} + +console.log("Architecture guardrails passed."); diff --git a/tooling/deploy/.env.production.example b/tooling/deploy/.env.production.example index 515ec37..85310d2 100644 --- a/tooling/deploy/.env.production.example +++ b/tooling/deploy/.env.production.example @@ -1,8 +1,9 @@ -# Runtime settings consumed by the app and by docker-compose.cicd.yml on the target host. +# Runtime settings consumed by the app and by docker-compose.prod.yml on the target host. POSTGRES_PASSWORD=replace-with-a-long-random-password NEXTAUTH_URL=https://capakraken.example.com NEXTAUTH_SECRET=replace-with-a-long-random-secret +RATE_LIMIT_BACKEND=redis # Optional but commonly needed application settings. SENTRY_DSN= diff --git a/tooling/deploy/README.md b/tooling/deploy/README.md index 5f14611..8f36f73 100644 --- a/tooling/deploy/README.md +++ b/tooling/deploy/README.md @@ -1,11 +1,12 @@ # Deploy Tooling -This directory contains the additive deployment scaffold for the image-based CI/CD target path. +This directory contains the canonical host-side tooling for the image-based staging and production path. ## Files -- `deploy-compose.sh`: pulls images, runs migrations, starts the app, and waits for readiness +- `deploy-compose.sh`: validates compose input, pulls images, runs migrations, starts the app, and waits for readiness - `.env.production.example`: example host-side runtime configuration +- `deploy.env.example`: example short-lived deployment manifest written by GitHub Actions ## Host Layout @@ -13,7 +14,7 @@ On the target host, the deploy directory should contain: ```text / - docker-compose.cicd.yml + docker-compose.prod.yml deploy.env .env.production tooling/deploy/deploy-compose.sh @@ -25,16 +26,20 @@ On the target host, the deploy directory should contain: 1. Copy `tooling/deploy/.env.production.example` to the target host as `.env.production`. 2. Fill in the required secrets and URLs. -3. Provision runtime AI/SMTP/anonymization secrets on the host through `.env.production` or the platform's secret facility. -4. Keep admin settings for status/verification only; do not use them to enter or rotate operational secrets. -5. After migration, use the admin cleanup action to remove any legacy database-stored runtime secrets. -6. Ensure Docker Engine and Docker Compose v2 are installed. -7. Ensure the target host can pull from `ghcr.io`. -8. Run the image release workflow, then the staging or production deploy workflow with the same image tag. +3. Keep `RATE_LIMIT_BACKEND=redis` so production uses the shared counter path intentionally. +4. Copy `tooling/deploy/deploy.env.example` to the host only if you want to dry-run the deploy script manually. +5. Replace the placeholder images in `deploy.env.example` with a real `sha-` tag and save it as `deploy.env` for a manual dry run. +6. Provision runtime AI/SMTP/anonymization secrets on the host through `.env.production` or the platform's secret facility. +7. Keep admin settings for status/verification only; do not use them to enter or rotate operational secrets. +8. After migration, use the admin cleanup action to remove any legacy database-stored runtime secrets. +9. Ensure Docker Engine and Docker Compose v2 are installed. +10. Ensure the target host can pull from `ghcr.io`. +11. A normal release no longer needs a Git checkout on the host. The host only needs the deploy bundle plus the two env files. +12. Merge to `main`, let `release-image.yml` publish the immutable images, then run the staging or production deploy workflow with the same image tag. ## Manual Host Test -After the files are present on the host, the flow can be tested manually: +After the files are present on the host, the canonical flow can be tested manually: ```bash set -a diff --git a/tooling/deploy/deploy-compose.sh b/tooling/deploy/deploy-compose.sh index f49dd8e..7ef1586 100755 --- a/tooling/deploy/deploy-compose.sh +++ b/tooling/deploy/deploy-compose.sh @@ -2,7 +2,7 @@ set -euo pipefail DEPLOY_ENV="${1:-unknown}" -COMPOSE_FILE="${COMPOSE_FILE:-docker-compose.cicd.yml}" +COMPOSE_FILE="${COMPOSE_FILE:-docker-compose.prod.yml}" APP_ENV_FILE="${APP_ENV_FILE:-.env.production}" DEPLOY_ENV_FILE="${DEPLOY_ENV_FILE:-deploy.env}" READY_URL="${READY_URL:-http://127.0.0.1:${APP_HOST_PORT:-3000}/api/ready}" @@ -36,6 +36,7 @@ if [ -n "${GHCR_USERNAME:-}" ] && [ -n "${GHCR_TOKEN:-}" ]; then printf '%s\n' "${GHCR_TOKEN}" | docker login ghcr.io -u "${GHCR_USERNAME}" --password-stdin fi +docker compose -f "${COMPOSE_FILE}" config -q docker compose -f "${COMPOSE_FILE}" pull app migrator docker compose -f "${COMPOSE_FILE}" up -d postgres redis docker compose -f "${COMPOSE_FILE}" run --rm migrator diff --git a/tooling/deploy/deploy.env.example b/tooling/deploy/deploy.env.example new file mode 100644 index 0000000..2804f91 --- /dev/null +++ b/tooling/deploy/deploy.env.example @@ -0,0 +1,5 @@ +APP_IMAGE=ghcr.io/example/capakraken-app:sha-abc123 +MIGRATOR_IMAGE=ghcr.io/example/capakraken-migrator:sha-abc123 +APP_HOST_PORT=3000 +GHCR_USERNAME= +GHCR_TOKEN=