feat(refactor/phase1): foundation infrastructure for modular pipeline
Phase 1 of PLAN_REFACTOR.md — all four sub-tasks implemented:
1.1 PipelineLogger (backend/app/core/pipeline_logger.py)
- Structured step_start/step_done/step_error/step_progress API
- Publishes to Python logging AND Redis SSE via log_task_event
- Context manager `pl.step("name")` for auto-timing
1.2 RenderJobDocument (backend/app/domains/rendering/job_document.py)
- Pydantic JSONB schema: state machine + per-step records + timing
- begin_step/finish_step/fail_step/skip_step helpers
- Migration 048: adds render_job_doc JSONB column to order_lines
- OrderLine model updated with render_job_doc field
1.3 TenantContextMiddleware (backend/app/core/middleware.py)
- Decodes JWT, stores tenant_id + role in request.state
- get_db updated to auto-apply RLS SET LOCAL from request.state
- Registered in main.py (runs before every request)
- JWT now embeds tenant_id claim via create_access_token()
- Login endpoint passes tenant_id to token creation
1.4 ProcessStep Registry (backend/app/core/process_steps.py)
- StepName StrEnum with all 20 pipeline step names
- Single source of truth for log prefixes, DB records, UI labels
Also adds db_utils.py with set_tenant_sync() + get_sync_session()
for use inside Celery tasks (bypass-safe RLS helper).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,71 @@
|
||||
"""Database utilities for use inside Celery tasks (sync context).
|
||||
|
||||
Celery tasks bypass FastAPI middleware, so tenant RLS context must be set
|
||||
manually. Use _set_tenant() immediately after creating a sync DB session.
|
||||
|
||||
Usage::
|
||||
|
||||
from app.core.db_utils import set_tenant_sync, get_sync_session
|
||||
|
||||
with get_sync_session() as db:
|
||||
set_tenant_sync(db, tenant_id, role)
|
||||
# queries here will be RLS-filtered
|
||||
"""
|
||||
import contextlib
|
||||
from typing import Generator
|
||||
|
||||
from sqlalchemy import create_engine, text
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from app.config import settings
|
||||
|
||||
|
||||
def set_tenant_sync(session: Session, tenant_id: str | None, role: str | None = None) -> None:
|
||||
"""Set RLS tenant context on a *synchronous* SQLAlchemy session.
|
||||
|
||||
Call this at the very start of any sync DB block inside a Celery task
|
||||
when you need tenant isolation. Admins bypass RLS; all other roles get
|
||||
a scoped context. If tenant_id is None the call is a no-op (global
|
||||
access, i.e. no RLS enforcement).
|
||||
"""
|
||||
if not tenant_id:
|
||||
return
|
||||
if role == "admin":
|
||||
session.execute(text("SET LOCAL app.current_tenant_id = 'bypass'"))
|
||||
else:
|
||||
session.execute(
|
||||
text("SET LOCAL app.current_tenant_id = :tid"),
|
||||
{"tid": tenant_id},
|
||||
)
|
||||
|
||||
|
||||
# Lazily created sync engine (reused across tasks in the same worker process)
|
||||
_sync_engine = None
|
||||
|
||||
|
||||
def _get_sync_engine():
|
||||
global _sync_engine
|
||||
if _sync_engine is None:
|
||||
sync_url = settings.database_url.replace("+asyncpg", "").replace("+aiosqlite", "")
|
||||
_sync_engine = create_engine(sync_url, pool_pre_ping=True, pool_size=5, max_overflow=10)
|
||||
return _sync_engine
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def get_sync_session(tenant_id: str | None = None, role: str | None = None) -> Generator[Session, None, None]:
|
||||
"""Context manager that yields a synchronous DB session with optional RLS.
|
||||
|
||||
Prefer using the existing async session patterns in FastAPI routes.
|
||||
This helper is intended for Celery tasks only.
|
||||
"""
|
||||
factory = sessionmaker(bind=_get_sync_engine(), expire_on_commit=False)
|
||||
with factory() as session:
|
||||
if tenant_id:
|
||||
set_tenant_sync(session, tenant_id, role)
|
||||
try:
|
||||
yield session
|
||||
except Exception:
|
||||
session.rollback()
|
||||
raise
|
||||
else:
|
||||
session.commit()
|
||||
@@ -0,0 +1,49 @@
|
||||
"""Application middleware.
|
||||
|
||||
TenantContextMiddleware
|
||||
Decodes the JWT Bearer token (if present) from every incoming request and
|
||||
stores tenant_id + role in request.state. The get_db dependency reads
|
||||
request.state to automatically set the RLS context before yielding the
|
||||
session — no endpoint code change required.
|
||||
"""
|
||||
import logging
|
||||
from jose import JWTError, jwt
|
||||
from starlette.middleware.base import BaseHTTPMiddleware
|
||||
from starlette.requests import Request
|
||||
from starlette.responses import Response
|
||||
|
||||
from app.config import settings
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TenantContextMiddleware(BaseHTTPMiddleware):
|
||||
"""Extract JWT → inject tenant_id + role into request.state.
|
||||
|
||||
Does NOT reject unauthenticated requests — that is still handled by the
|
||||
route-level dependencies (require_admin, get_current_user, etc.).
|
||||
Missing / invalid tokens result in request.state.tenant_id = None.
|
||||
"""
|
||||
|
||||
async def dispatch(self, request: Request, call_next) -> Response:
|
||||
tenant_id: str | None = None
|
||||
role: str | None = None
|
||||
|
||||
auth_header = request.headers.get("Authorization", "")
|
||||
if auth_header.startswith("Bearer "):
|
||||
token = auth_header[7:]
|
||||
try:
|
||||
payload = jwt.decode(
|
||||
token,
|
||||
settings.jwt_secret_key,
|
||||
algorithms=[settings.jwt_algorithm],
|
||||
)
|
||||
tenant_id = payload.get("tenant_id")
|
||||
role = payload.get("role")
|
||||
except JWTError:
|
||||
pass # invalid/expired tokens are handled per-endpoint
|
||||
|
||||
request.state.tenant_id = tenant_id
|
||||
request.state.role = role
|
||||
|
||||
return await call_next(request)
|
||||
@@ -0,0 +1,106 @@
|
||||
"""Structured pipeline logger.
|
||||
|
||||
Wraps Python logging + Redis SSE streaming for consistent, prefixed log output
|
||||
from all Celery pipeline tasks. Every method:
|
||||
- emits a Python `logging` line with a [STEP_NAME] prefix
|
||||
- publishes to Redis via log_task_event for SSE streaming in the UI
|
||||
"""
|
||||
import logging
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from app.core.task_logs import log_task_event
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PipelineLogger:
|
||||
"""Structured logger for a single pipeline execution context.
|
||||
|
||||
Usage in a Celery task::
|
||||
|
||||
pl = PipelineLogger(task_id=self.request.id, order_line_id=str(line.id))
|
||||
pl.step_start("occ_glb_export", {"cad_file_id": cad_file_id})
|
||||
...
|
||||
pl.step_done("occ_glb_export", duration_s=8.4, result={"size_bytes": 204800})
|
||||
"""
|
||||
|
||||
def __init__(self, task_id: str | None, order_line_id: str | None = None):
|
||||
self.task_id = task_id or "unknown"
|
||||
self.order_line_id = order_line_id
|
||||
self._step_starts: dict[str, float] = {}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def step_start(self, step: str, context: dict[str, Any] | None = None) -> None:
|
||||
self._step_starts[step] = time.time()
|
||||
msg = f"[{step}] start"
|
||||
if context:
|
||||
msg += f" | {context}"
|
||||
_log.info(msg)
|
||||
log_task_event(self.task_id, msg, level="info")
|
||||
|
||||
def step_progress(self, step: str, pct: int, msg: str) -> None:
|
||||
full = f"[{step}] {pct}% — {msg}"
|
||||
_log.info(full)
|
||||
log_task_event(self.task_id, full, level="info")
|
||||
|
||||
def step_done(self, step: str, duration_s: float | None = None, result: dict[str, Any] | None = None) -> None:
|
||||
if duration_s is None:
|
||||
start = self._step_starts.get(step)
|
||||
duration_s = round(time.time() - start, 2) if start else None
|
||||
parts = [f"[{step}] done"]
|
||||
if duration_s is not None:
|
||||
parts.append(f"{duration_s:.1f}s")
|
||||
if result:
|
||||
parts.append(str(result))
|
||||
msg = " | ".join(parts)
|
||||
_log.info(msg)
|
||||
log_task_event(self.task_id, msg, level="info")
|
||||
|
||||
def step_error(self, step: str, error: str, exc: Exception | None = None) -> None:
|
||||
msg = f"[{step}] ERROR — {error}"
|
||||
if exc:
|
||||
_log.exception(msg)
|
||||
else:
|
||||
_log.error(msg)
|
||||
log_task_event(self.task_id, msg, level="error")
|
||||
|
||||
def info(self, step: str, msg: str) -> None:
|
||||
full = f"[{step}] {msg}"
|
||||
_log.info(full)
|
||||
log_task_event(self.task_id, full, level="info")
|
||||
|
||||
def warning(self, step: str, msg: str) -> None:
|
||||
full = f"[{step}] WARNING — {msg}"
|
||||
_log.warning(full)
|
||||
log_task_event(self.task_id, full, level="warning")
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Context manager for a single step
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def step(self, step_name: str, context: dict[str, Any] | None = None) -> "_StepContext":
|
||||
return _StepContext(self, step_name, context)
|
||||
|
||||
|
||||
class _StepContext:
|
||||
"""Context manager that auto-calls step_start / step_done / step_error."""
|
||||
|
||||
def __init__(self, pl: PipelineLogger, step_name: str, context: dict | None):
|
||||
self._pl = pl
|
||||
self._name = step_name
|
||||
self._context = context
|
||||
|
||||
def __enter__(self) -> "_StepContext":
|
||||
self._pl.step_start(self._name, self._context)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
if exc_type is None:
|
||||
self._pl.step_done(self._name)
|
||||
else:
|
||||
self._pl.step_error(self._name, str(exc_val), exc_val)
|
||||
return False # do not suppress exceptions
|
||||
@@ -0,0 +1,39 @@
|
||||
"""Named pipeline step identifiers.
|
||||
|
||||
All Celery tasks and render scripts reference these constants so that log
|
||||
messages, DB records, and UI labels stay consistent across the codebase.
|
||||
"""
|
||||
from enum import StrEnum
|
||||
|
||||
|
||||
class StepName(StrEnum):
|
||||
# ── STEP file processing ──────────────────────────────────────────
|
||||
RESOLVE_STEP_PATH = "resolve_step_path"
|
||||
OCC_OBJECT_EXTRACT = "occ_object_extract"
|
||||
OCC_GLB_EXPORT = "occ_glb_export"
|
||||
GLB_BBOX = "glb_bbox"
|
||||
MATERIAL_MAP_RESOLVE = "material_map_resolve"
|
||||
AUTO_POPULATE_MATERIALS = "auto_populate_materials"
|
||||
|
||||
# ── Thumbnail generation ─────────────────────────────────────────
|
||||
BLENDER_RENDER = "blender_render"
|
||||
THREEJS_RENDER = "threejs_render"
|
||||
THUMBNAIL_SAVE = "thumbnail_save"
|
||||
|
||||
# ── Order line render ─────────────────────────────────────────────
|
||||
ORDER_LINE_SETUP = "order_line_setup"
|
||||
RESOLVE_TEMPLATE = "resolve_template"
|
||||
BLENDER_STILL = "blender_still"
|
||||
BLENDER_TURNTABLE = "blender_turntable"
|
||||
OUTPUT_SAVE = "output_save"
|
||||
|
||||
# ── GLB / asset export ────────────────────────────────────────────
|
||||
EXPORT_GLB_GEOMETRY = "export_glb_geometry"
|
||||
EXPORT_GLB_PRODUCTION = "export_glb_production"
|
||||
EXPORT_BLEND = "export_blend"
|
||||
|
||||
# ── STL cache ────────────────────────────────────────────────────
|
||||
STL_CACHE_GENERATE = "stl_cache_generate"
|
||||
|
||||
# ── Notifications ─────────────────────────────────────────────────
|
||||
NOTIFY = "notify"
|
||||
Reference in New Issue
Block a user