Files
CapaKraken/.github/workflows/ci.yml
T
Hartmut 805bb0464f security(docker): remove hardcoded dev password, stop placeholder secrets leaking into migrator image (#50)
- docker-compose.yml: require ${POSTGRES_PASSWORD} for the postgres service
  and the app container's DATABASE_URL. No default — compose refuses to start
  without it, mirroring the existing PGADMIN_PASSWORD pattern.
- Dockerfile.prod: move auth/db ENV assignments from persistent ENV lines into
  an inline env prefix on the `pnpm build` RUN step. Placeholders are still
  available to `next build` but no longer persist in the builder layer or in
  the published migrator image (which is FROM builder).
- Dockerfile.dev: add HEALTHCHECK against /api/health and install curl for it.
- .dockerignore: cover nested **/.env*, **/*.pem, **/*.key, **/secrets/**.
- runtime-env.ts: add the CI build placeholder strings to the disallowed-secret
  set so a misconfigured prod deploy using the baked-in ARG defaults fails
  startup instead of silently running with a known-bad secret.
- .env.example: document the new POSTGRES_PASSWORD requirement.
- CI: write POSTGRES_PASSWORD into the Fresh-Linux Docker Deploy job's .env
  (must match docker-compose.ci.yml's hardcoded DATABASE_URL), and provide a
  dummy value in the E2E job where compose validates all services' interp.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-17 14:50:05 +02:00

680 lines
28 KiB
YAML

name: CI
# Retrigger marker: b2d89ca (docker-deploy smoke retry)
on:
push:
branches: [main]
paths-ignore:
- "docs/**"
- ".gitea/**"
- "**/*.md"
- "LICENSE"
pull_request:
branches: [main]
paths-ignore:
- "docs/**"
- ".gitea/**"
- "**/*.md"
- "LICENSE"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
NODE_VERSION: "20"
PNPM_VERSION: "9.14.2"
CI_AUTH_URL: http://localhost:3100
# Placeholder for CI — real secret only matters at deploy time.
# next build collects page data for auth routes and aborts if empty.
CI_AUTH_SECRET: ci-test-secret-minimum-32-chars-xx
jobs:
guardrails:
name: Architecture Guardrails
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install pnpm
run: npm install -g pnpm@${{ env.PNPM_VERSION }}
- uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Run repo script tests
run: pnpm test:scripts
- name: Check architecture guardrails
run: pnpm check:architecture
- name: Check workspace exports
run: pnpm check:exports
- name: Check workspace imports
run: pnpm check:imports
- name: Security audit (high+ severity)
run: pnpm audit --audit-level=high
# ──────────────────────────────────────────────
# Typecheck — ~40s, no services needed
# ──────────────────────────────────────────────
typecheck:
name: Typecheck
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install pnpm
run: npm install -g pnpm@${{ env.PNPM_VERSION }}
- uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Generate Prisma client
run: pnpm db:generate
- name: Cache Turborepo
uses: actions/cache@v4
continue-on-error: true
with:
path: .turbo
key: turbo-typecheck-${{ github.sha }}
restore-keys: turbo-typecheck-
- name: Run typecheck
run: pnpm typecheck
assistant-split:
name: Assistant Split Regression
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install pnpm
run: npm install -g pnpm@${{ env.PNPM_VERSION }}
- uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Generate Prisma client
run: pnpm db:generate
- name: Run assistant split regression
run: pnpm --filter @capakraken/api test:assistant-split
# ──────────────────────────────────────────────
# Lint — ~20s, no services needed
# ──────────────────────────────────────────────
lint:
name: Lint
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install pnpm
run: npm install -g pnpm@${{ env.PNPM_VERSION }}
- uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Generate Prisma client
run: pnpm db:generate
- name: Cache Turborepo
uses: actions/cache@v4
continue-on-error: true
with:
path: .turbo
key: turbo-lint-${{ github.sha }}
restore-keys: turbo-lint-
- name: Run lint
run: pnpm lint
# ──────────────────────────────────────────────
# Unit tests — needs PostgreSQL + Redis
# ──────────────────────────────────────────────
test:
name: Unit Tests
runs-on: ubuntu-latest
services:
postgres:
image: postgres:16
env:
POSTGRES_DB: capakraken_test
POSTGRES_USER: capakraken
POSTGRES_PASSWORD: capakraken_test
options: >-
--health-cmd="pg_isready -U capakraken -d capakraken_test"
--health-interval=10s
--health-timeout=5s
--health-retries=5
redis:
image: redis:7
options: >-
--health-cmd="redis-cli ping"
--health-interval=10s
--health-timeout=5s
--health-retries=5
env:
DATABASE_URL: postgresql://capakraken:capakraken_test@postgres:5432/capakraken_test
REDIS_URL: redis://redis:6379
# Force in-memory rate limiter to avoid cross-test state when Redis drops.
# Redis fallback downgrades to max/10 limits which rate-limits unit tests.
RATE_LIMIT_BACKEND: memory
# Tests assume Europe/Berlin for month-boundary math (new Date(y,m,1)).
TZ: Europe/Berlin
NEXTAUTH_URL: ${{ env.CI_AUTH_URL }}
AUTH_URL: ${{ env.CI_AUTH_URL }}
NEXTAUTH_SECRET: ${{ env.CI_AUTH_SECRET }}
AUTH_SECRET: ${{ env.CI_AUTH_SECRET }}
steps:
- uses: actions/checkout@v4
- name: Install pnpm
run: npm install -g pnpm@${{ env.PNPM_VERSION }}
- uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Generate Prisma client
run: pnpm db:generate
- name: Run unit tests with coverage
run: |
pnpm --filter @capakraken/web test:unit -- --coverage
pnpm --filter @capakraken/engine exec vitest run --coverage
pnpm --filter @capakraken/staffing exec vitest run --coverage
pnpm --filter @capakraken/api exec vitest run --coverage
pnpm --filter @capakraken/application exec vitest run --coverage
pnpm --filter @capakraken/shared exec vitest run --coverage
pnpm --filter @capakraken/db test:unit
- name: Upload coverage reports
uses: actions/upload-artifact@v4
continue-on-error: true # upload-artifact@v4 unsupported on Gitea (GHES) runner
if: ${{ !cancelled() }}
with:
name: coverage-reports
path: |
apps/web/coverage/
packages/engine/coverage/
packages/staffing/coverage/
packages/api/coverage/
packages/application/coverage/
packages/shared/coverage/
retention-days: 14
# ──────────────────────────────────────────────
# Build — depends on typecheck passing
# ──────────────────────────────────────────────
build:
name: Build
needs: [guardrails, typecheck]
runs-on: ubuntu-latest
env:
DATABASE_URL: postgresql://placeholder:placeholder@localhost:5432/placeholder
REDIS_URL: redis://placeholder:6379
NEXTAUTH_URL: ${{ env.CI_AUTH_URL }}
AUTH_URL: ${{ env.CI_AUTH_URL }}
NEXTAUTH_SECRET: ${{ env.CI_AUTH_SECRET }}
AUTH_SECRET: ${{ env.CI_AUTH_SECRET }}
steps:
- uses: actions/checkout@v4
- name: Install pnpm
run: npm install -g pnpm@${{ env.PNPM_VERSION }}
- uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Generate Prisma client
run: pnpm db:generate
- name: Cache Turborepo
uses: actions/cache@v4
continue-on-error: true
with:
path: .turbo
key: turbo-build-${{ github.sha }}
restore-keys: turbo-build-
- name: Cache Next.js build
uses: actions/cache@v4
continue-on-error: true
with:
path: apps/web/.next/cache
key: nextjs-${{ hashFiles('pnpm-lock.yaml') }}-${{ github.sha }}
restore-keys: nextjs-${{ hashFiles('pnpm-lock.yaml') }}-
- name: Build
run: pnpm --filter @capakraken/web exec next build
# ──────────────────────────────────────────────
# E2E — depends on build, needs PostgreSQL + Redis
# ──────────────────────────────────────────────
e2e:
name: E2E Tests
needs: [build]
runs-on: ubuntu-latest
services:
# Unique hostnames — "postgres"/"redis" collide with Gitea's own core
# containers and concurrent job service containers on the shared
# gitea_gitea network, producing split-brain where push hits one DB and
# seed hits another. See audit_logs-missing bug from commit f856dd26.
e2epg:
image: postgres:16
env:
POSTGRES_DB: capakraken_test
POSTGRES_USER: capakraken
POSTGRES_PASSWORD: capakraken_test
options: >-
--health-cmd="pg_isready -U capakraken -d capakraken_test"
--health-interval=10s
--health-timeout=5s
--health-retries=5
e2eredis:
image: redis:7
options: >-
--health-cmd="redis-cli ping"
--health-interval=10s
--health-timeout=5s
--health-retries=5
env:
DATABASE_URL: postgresql://capakraken:capakraken_test@e2epg:5432/capakraken_test
# Playwright test-server.mjs requires an explicit test DB URL.
PLAYWRIGHT_DATABASE_URL: postgresql://capakraken:capakraken_test@e2epg:5432/capakraken_test
# prisma-with-env.mjs refuses to run unless DATABASE_URL's db name matches
# the expected target; default is "capakraken", CI uses capakraken_test.
CAPAKRAKEN_EXPECTED_DB_NAME: capakraken_test
ALLOW_DESTRUCTIVE_DB_TOOLS: "true"
CONFIRM_DESTRUCTIVE_DB_NAME: capakraken_test
REDIS_URL: redis://e2eredis:6379
PORT: 3100
# test-server.mjs spawns `docker compose --profile test up postgres-test`;
# docker compose validates env interpolation in ALL services before
# applying the profile filter, so the unused pgadmin service's
# ${PGADMIN_PASSWORD:?} check fires and aborts the compose call.
# Provide a dummy value so parsing succeeds — pgadmin is never started.
PGADMIN_PASSWORD: ci-unused
# Same reason as PGADMIN_PASSWORD: docker compose validates env
# interpolation across all services, including postgres (which has
# ${POSTGRES_PASSWORD:?}). Dummy value — postgres service is not used
# here (the `e2epg` GH Actions service container is).
POSTGRES_PASSWORD: ci-unused
# Tell test-server.mjs not to spin up its own postgres-test container
# — the e2epg job service is already running and reachable. Without
# this, test-server tries to publish 5432 on the QNAP host, which
# collides with Gitea's core postgres.
PLAYWRIGHT_USE_EXTERNAL_DB: "true"
NEXTAUTH_URL: ${{ env.CI_AUTH_URL }}
AUTH_URL: ${{ env.CI_AUTH_URL }}
NEXTAUTH_SECRET: ${{ env.CI_AUTH_SECRET }}
AUTH_SECRET: ${{ env.CI_AUTH_SECRET }}
steps:
- uses: actions/checkout@v4
- name: Install pnpm
run: npm install -g pnpm@${{ env.PNPM_VERSION }}
- uses: actions/setup-node@v4
with:
node-version: ${{ env.NODE_VERSION }}
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Generate Prisma client
run: pnpm db:generate
- name: Cache Playwright browsers
id: playwright-cache
uses: actions/cache@v4
continue-on-error: true
with:
path: ~/.cache/ms-playwright
key: playwright-${{ hashFiles('apps/web/package.json') }}
restore-keys: playwright-
- name: Install Playwright browsers
if: steps.playwright-cache.outputs.cache-hit != 'true'
run: pnpm --filter @capakraken/web exec playwright install --with-deps chromium
- name: Install Playwright system deps
if: steps.playwright-cache.outputs.cache-hit == 'true'
run: pnpm --filter @capakraken/web exec playwright install-deps chromium
- name: Install psql (debug schema state)
run: sudo apt-get update && sudo apt-get install -y --no-install-recommends postgresql-client
- name: Push DB schema & seed
env:
PGPASSWORD: capakraken_test
run: |
# Nuke any leftover schema state from a previous job that shared the
# postgres service container (act_runner reuses service volumes).
# --force-reset alone proved unreliable: push reported "in sync" but
# audit_logs ended up missing. Diagnostic hypothesis: there are TWO
# postgres hosts reachable as "postgres" on gitea_gitea (the Gitea
# core DB plus the service container) and push/seed hit different
# ones. Verify via direct psql.
echo "--- hosts resolving to 'e2epg' ---"
getent hosts e2epg || true
# Split-brain fix: 'e2epg' resolves to MULTIPLE IPs on the shared
# gitea_gitea network (leftover service containers from concurrent
# or crashed runs). Prisma picks one IP; psql picks another; push
# reports success but verification sees an empty database. Probe
# every resolved IP and lock onto the one that accepts our creds,
# then force DATABASE_URL/PLAYWRIGHT_DATABASE_URL to that explicit
# IP for the rest of the job so every subsequent step hits the
# same postgres instance.
IPS=$(getent hosts e2epg | awk '{print $1}')
PG_IP=""
for ip in $IPS; do
if PGPASSWORD=capakraken_test psql -h "$ip" -U capakraken -d capakraken_test -v ON_ERROR_STOP=1 -Atc "SELECT 1" >/dev/null 2>&1; then
PG_IP="$ip"
echo "Locked onto postgres at $PG_IP"
break
else
echo "Rejected $ip (auth or DB mismatch)"
fi
done
if [ -z "$PG_IP" ]; then
echo "ERROR: no resolved e2epg IP accepted capakraken_test credentials"
exit 1
fi
PINNED_URL="postgresql://capakraken:capakraken_test@$PG_IP:5432/capakraken_test"
echo "DATABASE_URL=$PINNED_URL" >> "$GITHUB_ENV"
echo "PLAYWRIGHT_DATABASE_URL=$PINNED_URL" >> "$GITHUB_ENV"
echo "--- DROP SCHEMA ---"
psql -h "$PG_IP" -U capakraken -d capakraken_test -v ON_ERROR_STOP=1 \
-c "DROP SCHEMA IF EXISTS public CASCADE; CREATE SCHEMA public; GRANT ALL ON SCHEMA public TO capakraken; GRANT ALL ON SCHEMA public TO public;"
echo "--- prisma db push ---"
DATABASE_URL="$PINNED_URL" pnpm --filter @capakraken/db exec prisma db push --schema ./prisma/schema.prisma --accept-data-loss --skip-generate
echo "--- tables in public after push ---"
psql -h "$PG_IP" -U capakraken -d capakraken_test -v ON_ERROR_STOP=1 -At \
-c "SELECT tablename FROM pg_tables WHERE schemaname='public' ORDER BY tablename" \
| tee /tmp/tables.txt
if ! grep -qx 'audit_logs' /tmp/tables.txt; then
echo "ERROR: audit_logs table missing after push!"
exit 1
fi
DATABASE_URL="$PINNED_URL" pnpm db:seed
- name: Run E2E tests
# Bypass turbo here — it runs in strict env mode and does not pass
# PLAYWRIGHT_DATABASE_URL / AUTH_SECRET / etc. through to the webServer
# subprocess, breaking test-server.mjs. Calling playwright directly
# inherits the job-level env unchanged.
#
# The full E2E suite (~167 tests across 20 specs) overwhelms the
# QNAP runner's RAM — Next.js test server hits its memory threshold
# and restarts mid-run, producing cascading ECONNREFUSED failures
# unrelated to test content. Scope CI to smoke.spec.ts; full suite
# is run locally / in a dedicated nightly job.
run: pnpm --filter @capakraken/web exec playwright test e2e/smoke.spec.ts
- name: Upload Playwright report
uses: actions/upload-artifact@v4
continue-on-error: true # upload-artifact@v4 unsupported on Gitea (GHES) runner
if: ${{ !cancelled() }}
with:
name: playwright-report
path: apps/web/playwright-report/
retention-days: 14
# ──────────────────────────────────────────────
# Fresh Docker Compose deploy test — validates
# that the prod compose bundle comes up clean
# from scratch and the smoke tests pass.
# ──────────────────────────────────────────────
docker-deploy-test:
name: Fresh-Linux Docker Deploy
needs: [build]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Create minimal .env
run: |
cat <<'EOF' > .env
NEXTAUTH_URL=http://localhost:3100
NEXTAUTH_SECRET=ci-test-secret-minimum-32-chars-xx
PGADMIN_PASSWORD=ci-pgadmin
# Must match the password baked into docker-compose.ci.yml's
# DATABASE_URL override (capakraken_dev).
POSTGRES_PASSWORD=capakraken_dev
EOF
- name: Tear down any stale stack & volumes
# act_runner on self-hosted QNAP keeps named compose volumes between
# runs. A previous run's failed migration entry in _prisma_migrations
# causes P3009 on the next migrate deploy; wipe volumes for a truly
# fresh deploy test every time.
run: docker compose -f docker-compose.yml -f docker-compose.ci.yml down -v --remove-orphans || true
- name: Start infrastructure (postgres + redis)
run: docker compose -f docker-compose.yml -f docker-compose.ci.yml up -d postgres redis
- name: Wait for postgres
run: |
for i in $(seq 1 20); do
docker compose -f docker-compose.yml -f docker-compose.ci.yml exec -T postgres pg_isready -U capakraken -d capakraken && break
sleep 3
done
- name: Build and start app (full profile)
run: docker compose -f docker-compose.yml -f docker-compose.ci.yml --profile full up -d --build app
- name: Resolve and pin app IP
# 'app' hostname collides on shared gitea_gitea network: many unrelated
# containers (from other stacks or concurrent jobs) also answer to
# "app" and to /api/health. Previously we probed every IP that
# `getent hosts app` returned and pinned the first 200 responder —
# which could easily be a foreign container whose process then died
# mid-test, producing ERR_CONNECTION_REFUSED.
#
# Use docker compose ps to uniquely identify OUR app container, then
# docker inspect to read its IP on the gitea_gitea network (the one
# the act_runner job can reach). No DNS, no guessing.
run: |
set -e
for i in $(seq 1 36); do
CID=$(docker compose -f docker-compose.yml -f docker-compose.ci.yml ps -q app || true)
if [ -n "$CID" ]; then
APP_IP=$(docker inspect -f '{{range $k,$v := .NetworkSettings.Networks}}{{if eq $k "gitea_gitea"}}{{$v.IPAddress}}{{end}}{{end}}' "$CID")
if [ -n "$APP_IP" ]; then
CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 5 "http://$APP_IP:3100/api/health" || echo "000")
echo "Attempt $i: container $CID on $APP_IP -> HTTP $CODE"
if [ "$CODE" = "200" ]; then
echo "APP_IP=$APP_IP" >> "$GITHUB_ENV"
echo "APP_BASE_URL=http://$APP_IP:3100" >> "$GITHUB_ENV"
exit 0
fi
else
echo "Attempt $i: container $CID has no gitea_gitea IP yet"
fi
else
echo "Attempt $i: compose has no 'app' container yet"
fi
sleep 5
done
echo "Our stack's app container never reported healthy on gitea_gitea"
docker compose -f docker-compose.yml -f docker-compose.ci.yml logs app --tail=50
exit 1
- name: Verify health response contains status ok
run: |
BODY=$(curl -sf "$APP_BASE_URL/api/health")
echo "$BODY"
echo "$BODY" | grep '"status":"ok"'
- name: Warm up root and signin paths (Next.js dev compile)
# Dockerfile.dev runs `pnpm dev`, so Next.js compiles pages on the
# first request. The middleware+root combo on a cold server can
# take >10s to JIT-compile and sometimes OOM-kills a worker on the
# QNAP runner, causing the "unauthenticated root redirects" smoke
# test to hit ERR_CONNECTION_REFUSED. Warm both routes before the
# smoke run: root (must return 307 redirect) and /auth/signin
# (must return 200). Do NOT use -L; the Location target can point
# to a hostname that is unreachable from the runner namespace, and
# we only need the route compiled, not the redirect followed.
run: |
warm() {
local path="$1"
local expect="$2"
for i in $(seq 1 24); do
CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 "${APP_BASE_URL}${path}" || echo "000")
echo "Warm-up ${path} $i: HTTP $CODE"
if [ "$CODE" = "$expect" ]; then return 0; fi
sleep 5
done
echo "Warm-up ${path} did not reach $expect; continuing anyway"
}
warm / 307
warm /auth/signin 200
- name: Seed admin user
# setup-admin.mjs imports @prisma/client and @node-rs/argon2, both of
# which live only in packages/db/node_modules under pnpm workspaces.
# Node's ESM bare-specifier resolver walks up from the *script's*
# directory (/app/scripts), not cwd, and NODE_PATH is a CJS-only
# escape hatch (ignored by ESM). Create a scripts/node_modules with
# symlinks to the real package directories so the resolver finds
# them on the first step up.
run: |
docker compose -f docker-compose.yml -f docker-compose.ci.yml exec -T app \
sh -c '
set -e
mkdir -p /app/scripts/node_modules
ln -sfn /app/packages/db/node_modules/@prisma /app/scripts/node_modules/@prisma
ln -sfn /app/packages/db/node_modules/@node-rs /app/scripts/node_modules/@node-rs
ln -sfn /app/packages/db/node_modules/.prisma /app/scripts/node_modules/.prisma
node /app/scripts/setup-admin.mjs --email admin@capakraken.dev --name Admin --password admin123
'
- name: Set up Node.js 20
uses: actions/setup-node@v4
with:
node-version: "20"
- name: Install Playwright and Chromium
# The repo root package.json uses pnpm `workspace:*` deps which npm
# cannot resolve, so install into an isolated temp dir and symlink
# @playwright/test into apps/web/node_modules so playwright.ci.config.ts
# (CJS) can resolve it by walking up from apps/web/.
run: |
set -e
mkdir -p /tmp/pw-install
cd /tmp/pw-install
[ -f package.json ] || npm init -y >/dev/null
npm install --no-save --no-package-lock @playwright/test@1.49
cd "$GITHUB_WORKSPACE"
mkdir -p apps/web/node_modules
ln -sfn /tmp/pw-install/node_modules/@playwright apps/web/node_modules/@playwright
ln -sfn /tmp/pw-install/node_modules/playwright apps/web/node_modules/playwright
ln -sfn /tmp/pw-install/node_modules/playwright-core apps/web/node_modules/playwright-core
/tmp/pw-install/node_modules/.bin/playwright install chromium --with-deps
- name: Re-warm routes immediately before smoke run
# The earlier warm-up runs ~4 minutes before the smoke tests (seed,
# Node setup, Playwright install all take real time on QNAP). In
# between, the Next.js dev server on a constrained host can evict
# or recompile routes under memory pressure — test #2 kept hitting
# ERR_CONNECTION_REFUSED on / while tests for /auth/signin and api
# routes worked fine. Re-warm both routes (same IP pin) just
# before Playwright starts so the server is guaranteed hot.
run: |
warm() {
local path="$1"
local expect="$2"
for i in $(seq 1 24); do
CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 "${APP_BASE_URL}${path}" || echo "000")
echo "Re-warm ${path} $i: HTTP $CODE"
if [ "$CODE" = "$expect" ]; then return 0; fi
sleep 3
done
echo "Re-warm ${path} did not reach $expect; continuing anyway"
}
warm / 307
warm /auth/signin 200
- name: Run smoke tests
# Use the pinned APP_BASE_URL (explicit IP) so Chromium hits the same
# container as the warm-up probes.
#
# Next.js dev mode on QNAP briefly drops the listening socket on
# route-transition compiles — test #2 (`/`) has hit ERR_CONNECTION_
# REFUSED between a warm-up and the test even though the same URL
# returned 307 moments earlier. Playwright's in-process retry runs
# while the socket is still down. Wrap the whole playwright
# invocation in a shell retry: if the first run fails, re-warm /
# aggressively and run the full suite once more.
run: |
run_smoke() {
PLAYWRIGHT_BASE_URL="$APP_BASE_URL" \
/tmp/pw-install/node_modules/.bin/playwright test \
--config apps/web/playwright.ci.config.ts
}
if run_smoke; then exit 0; fi
echo "First smoke run failed — aggressive re-warm + retry"
for i in $(seq 1 10); do
CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 30 "${APP_BASE_URL}/" || echo "000")
echo "Post-fail warm / $i: HTTP $CODE"
[ "$CODE" = "307" ] && break
sleep 3
done
sleep 5
run_smoke
- name: Upload Playwright report
if: failure()
continue-on-error: true # upload-artifact@v4 unsupported on Gitea (GHES) runner
uses: actions/upload-artifact@v4
with:
name: playwright-smoke-report
path: apps/web/playwright-report/
retention-days: 7
- name: Show logs on failure
if: failure()
run: docker compose -f docker-compose.yml -f docker-compose.ci.yml logs --tail=100
# ──────────────────────────────────────────────
# Release images — only on push to main, after
# every check has passed. Calls the reusable
# release-image.yml workflow.
# ──────────────────────────────────────────────
release-images:
name: Release Images
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
needs: [lint, test, e2e, assistant-split, docker-deploy-test]
uses: ./.github/workflows/release-image.yml
secrets: inherit