refactor(A1): remove Flamenco, simplify render pipeline to Celery-only
- Remove flamenco-manager and flamenco-worker from docker-compose.yml - Delete flamenco_client.py, flamenco_tasks.py, docker_scaler.py - Simplify render_dispatcher.py to Celery-only (removes ~300 lines) - Remove Flamenco beat schedule from celery_app.py - Clean admin.py: remove flamenco settings, endpoints, threejs validation - Clean orders.py cancel-render: Celery revoke only - Clean worker.py: remove flamenco_job_id from activity response - Migration 032: cancel lingering flamenco jobs, remove flamenco settings - PLAN.md: mark all decisions confirmed, status IN UMSETZUNG Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+36
@@ -5,3 +5,39 @@ node_modules/
|
|||||||
*.log
|
*.log
|
||||||
core
|
core
|
||||||
/blender-renderer/core
|
/blender-renderer/core
|
||||||
|
|
||||||
|
# Python cache
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*.pyo
|
||||||
|
|
||||||
|
# Node / Vite build output
|
||||||
|
dist/
|
||||||
|
node_modules/
|
||||||
|
|
||||||
|
# Celery beat schedule
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# Test cache
|
||||||
|
.pytest_cache/
|
||||||
|
.coverage
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
# Excel lock files
|
||||||
|
~$*
|
||||||
|
.~lock.*#
|
||||||
|
|
||||||
|
# Kundendaten ausschließen
|
||||||
|
*.stp
|
||||||
|
*.step
|
||||||
|
*.stl
|
||||||
|
*.xls
|
||||||
|
+.xslx
|
||||||
|
*.csv
|
||||||
|
*.xlsx
|
||||||
|
|
||||||
|
*.blend1
|
||||||
|
|||||||
@@ -0,0 +1,49 @@
|
|||||||
|
"""Remove Flamenco: cancel lingering flamenco jobs, clean up settings.
|
||||||
|
|
||||||
|
Revision ID: 032
|
||||||
|
Revises: 031
|
||||||
|
Create Date: 2026-03-06
|
||||||
|
"""
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
|
||||||
|
revision = '032'
|
||||||
|
down_revision = '031'
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# Cancel any order lines that were dispatched to Flamenco and never completed
|
||||||
|
op.execute("""
|
||||||
|
UPDATE order_lines
|
||||||
|
SET render_status = 'cancelled',
|
||||||
|
render_completed_at = NOW(),
|
||||||
|
render_log = render_log || '{"cancelled_reason": "flamenco_removed_in_v2"}'::jsonb
|
||||||
|
WHERE render_backend_used = 'flamenco'
|
||||||
|
AND render_status IN ('processing', 'pending')
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Remove Flamenco-specific system settings
|
||||||
|
op.execute("""
|
||||||
|
DELETE FROM system_settings
|
||||||
|
WHERE key IN ('flamenco_manager_url', 'flamenco_worker_count')
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Reset render_backend setting to 'celery' if it was 'flamenco' or 'auto'
|
||||||
|
op.execute("""
|
||||||
|
UPDATE system_settings
|
||||||
|
SET value = 'celery'
|
||||||
|
WHERE key = 'render_backend' AND value IN ('flamenco', 'auto')
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# Re-insert default Flamenco settings
|
||||||
|
op.execute("""
|
||||||
|
INSERT INTO system_settings (key, value, updated_at)
|
||||||
|
VALUES
|
||||||
|
('flamenco_manager_url', 'http://flamenco-manager:8080', NOW()),
|
||||||
|
('flamenco_worker_count', '1', NOW())
|
||||||
|
ON CONFLICT (key) DO NOTHING
|
||||||
|
""")
|
||||||
@@ -1,4 +1,3 @@
|
|||||||
import asyncio
|
|
||||||
import json
|
import json
|
||||||
import uuid
|
import uuid
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
@@ -17,27 +16,21 @@ from app.utils.auth import require_admin, hash_password
|
|||||||
|
|
||||||
router = APIRouter(prefix="/admin", tags=["admin"])
|
router = APIRouter(prefix="/admin", tags=["admin"])
|
||||||
|
|
||||||
VALID_RENDERERS = {"pillow", "blender", "threejs"}
|
VALID_RENDERERS = {"pillow", "blender"}
|
||||||
VALID_ENGINES = {"cycles", "eevee"}
|
VALID_ENGINES = {"cycles", "eevee"}
|
||||||
VALID_THREEJS_SIZES = {512, 1024, 2048}
|
VALID_FORMATS = {"jpg", "png"}
|
||||||
VALID_FORMATS = {"jpg", "png"}
|
VALID_STL_QUALITIES = {"low", "high"}
|
||||||
VALID_STL_QUALITIES = {"low", "high"}
|
|
||||||
VALID_CYCLES_DEVICES = {"auto", "gpu", "cpu"}
|
VALID_CYCLES_DEVICES = {"auto", "gpu", "cpu"}
|
||||||
VALID_RENDER_BACKENDS = {"celery", "flamenco", "auto"}
|
|
||||||
|
|
||||||
SETTINGS_DEFAULTS: dict[str, str] = {
|
SETTINGS_DEFAULTS: dict[str, str] = {
|
||||||
"thumbnail_renderer": "pillow",
|
"thumbnail_renderer": "blender",
|
||||||
"blender_engine": "cycles",
|
"blender_engine": "cycles",
|
||||||
"blender_cycles_samples": "256",
|
"blender_cycles_samples": "256",
|
||||||
"blender_eevee_samples": "64",
|
"blender_eevee_samples": "64",
|
||||||
"threejs_render_size": "1024",
|
|
||||||
"thumbnail_format": "jpg",
|
"thumbnail_format": "jpg",
|
||||||
"stl_quality": "low",
|
"stl_quality": "low",
|
||||||
"blender_smooth_angle": "30",
|
"blender_smooth_angle": "30",
|
||||||
"cycles_device": "auto",
|
"cycles_device": "auto",
|
||||||
"render_backend": "celery",
|
"render_backend": "celery",
|
||||||
"flamenco_manager_url": "http://flamenco-manager:8080",
|
|
||||||
"flamenco_worker_count": "1",
|
|
||||||
"blender_max_concurrent_renders": "3",
|
"blender_max_concurrent_renders": "3",
|
||||||
"product_thumbnail_priority": '["latest_render","cad_thumbnail"]',
|
"product_thumbnail_priority": '["latest_render","cad_thumbnail"]',
|
||||||
"render_stall_timeout_minutes": "120",
|
"render_stall_timeout_minutes": "120",
|
||||||
@@ -45,18 +38,15 @@ SETTINGS_DEFAULTS: dict[str, str] = {
|
|||||||
|
|
||||||
|
|
||||||
class SettingsOut(BaseModel):
|
class SettingsOut(BaseModel):
|
||||||
thumbnail_renderer: str = "pillow"
|
thumbnail_renderer: str = "blender"
|
||||||
blender_engine: str = "cycles"
|
blender_engine: str = "cycles"
|
||||||
blender_cycles_samples: int = 256
|
blender_cycles_samples: int = 256
|
||||||
blender_eevee_samples: int = 64
|
blender_eevee_samples: int = 64
|
||||||
threejs_render_size: int = 1024
|
|
||||||
thumbnail_format: str = "jpg"
|
thumbnail_format: str = "jpg"
|
||||||
stl_quality: str = "low"
|
stl_quality: str = "low"
|
||||||
blender_smooth_angle: int = 30
|
blender_smooth_angle: int = 30
|
||||||
cycles_device: str = "auto"
|
cycles_device: str = "auto"
|
||||||
render_backend: str = "celery"
|
render_backend: str = "celery"
|
||||||
flamenco_manager_url: str = "http://flamenco-manager:8080"
|
|
||||||
flamenco_worker_count: int = 1
|
|
||||||
blender_max_concurrent_renders: int = 3
|
blender_max_concurrent_renders: int = 3
|
||||||
product_thumbnail_priority: str = '["latest_render","cad_thumbnail"]'
|
product_thumbnail_priority: str = '["latest_render","cad_thumbnail"]'
|
||||||
render_stall_timeout_minutes: int = 120
|
render_stall_timeout_minutes: int = 120
|
||||||
@@ -67,14 +57,11 @@ class SettingsUpdate(BaseModel):
|
|||||||
blender_engine: str | None = None
|
blender_engine: str | None = None
|
||||||
blender_cycles_samples: int | None = None
|
blender_cycles_samples: int | None = None
|
||||||
blender_eevee_samples: int | None = None
|
blender_eevee_samples: int | None = None
|
||||||
threejs_render_size: int | None = None
|
|
||||||
thumbnail_format: str | None = None
|
thumbnail_format: str | None = None
|
||||||
stl_quality: str | None = None
|
stl_quality: str | None = None
|
||||||
blender_smooth_angle: int | None = None
|
blender_smooth_angle: int | None = None
|
||||||
cycles_device: str | None = None
|
cycles_device: str | None = None
|
||||||
render_backend: str | None = None
|
render_backend: str | None = None
|
||||||
flamenco_manager_url: str | None = None
|
|
||||||
flamenco_worker_count: int | None = None
|
|
||||||
blender_max_concurrent_renders: int | None = None
|
blender_max_concurrent_renders: int | None = None
|
||||||
product_thumbnail_priority: str | None = None
|
product_thumbnail_priority: str | None = None
|
||||||
render_stall_timeout_minutes: int | None = None
|
render_stall_timeout_minutes: int | None = None
|
||||||
@@ -171,14 +158,11 @@ def _settings_to_out(raw: dict[str, str]) -> SettingsOut:
|
|||||||
blender_engine=raw["blender_engine"],
|
blender_engine=raw["blender_engine"],
|
||||||
blender_cycles_samples=int(raw["blender_cycles_samples"]),
|
blender_cycles_samples=int(raw["blender_cycles_samples"]),
|
||||||
blender_eevee_samples=int(raw["blender_eevee_samples"]),
|
blender_eevee_samples=int(raw["blender_eevee_samples"]),
|
||||||
threejs_render_size=int(raw["threejs_render_size"]),
|
|
||||||
thumbnail_format=raw["thumbnail_format"],
|
thumbnail_format=raw["thumbnail_format"],
|
||||||
stl_quality=raw["stl_quality"],
|
stl_quality=raw["stl_quality"],
|
||||||
blender_smooth_angle=int(raw["blender_smooth_angle"]),
|
blender_smooth_angle=int(raw["blender_smooth_angle"]),
|
||||||
cycles_device=raw["cycles_device"],
|
cycles_device=raw["cycles_device"],
|
||||||
render_backend=raw["render_backend"],
|
render_backend=raw["render_backend"],
|
||||||
flamenco_manager_url=raw["flamenco_manager_url"],
|
|
||||||
flamenco_worker_count=int(raw["flamenco_worker_count"]),
|
|
||||||
blender_max_concurrent_renders=int(raw["blender_max_concurrent_renders"]),
|
blender_max_concurrent_renders=int(raw["blender_max_concurrent_renders"]),
|
||||||
product_thumbnail_priority=raw.get("product_thumbnail_priority", '["latest_render","cad_thumbnail"]'),
|
product_thumbnail_priority=raw.get("product_thumbnail_priority", '["latest_render","cad_thumbnail"]'),
|
||||||
render_stall_timeout_minutes=int(raw.get("render_stall_timeout_minutes", "120")),
|
render_stall_timeout_minutes=int(raw.get("render_stall_timeout_minutes", "120")),
|
||||||
@@ -207,8 +191,6 @@ async def update_settings(
|
|||||||
raise HTTPException(400, detail="blender_cycles_samples must be 1–4096")
|
raise HTTPException(400, detail="blender_cycles_samples must be 1–4096")
|
||||||
if body.blender_eevee_samples is not None and not (1 <= body.blender_eevee_samples <= 1024):
|
if body.blender_eevee_samples is not None and not (1 <= body.blender_eevee_samples <= 1024):
|
||||||
raise HTTPException(400, detail="blender_eevee_samples must be 1–1024")
|
raise HTTPException(400, detail="blender_eevee_samples must be 1–1024")
|
||||||
if body.threejs_render_size is not None and body.threejs_render_size not in VALID_THREEJS_SIZES:
|
|
||||||
raise HTTPException(400, detail=f"Invalid threejs_render_size. Choose: {', '.join(str(s) for s in sorted(VALID_THREEJS_SIZES))}")
|
|
||||||
if body.thumbnail_format is not None and body.thumbnail_format not in VALID_FORMATS:
|
if body.thumbnail_format is not None and body.thumbnail_format not in VALID_FORMATS:
|
||||||
raise HTTPException(400, detail=f"Invalid thumbnail_format. Choose: {', '.join(sorted(VALID_FORMATS))}")
|
raise HTTPException(400, detail=f"Invalid thumbnail_format. Choose: {', '.join(sorted(VALID_FORMATS))}")
|
||||||
if body.stl_quality is not None and body.stl_quality not in VALID_STL_QUALITIES:
|
if body.stl_quality is not None and body.stl_quality not in VALID_STL_QUALITIES:
|
||||||
@@ -217,10 +199,6 @@ async def update_settings(
|
|||||||
raise HTTPException(400, detail="blender_smooth_angle must be 0–180 degrees")
|
raise HTTPException(400, detail="blender_smooth_angle must be 0–180 degrees")
|
||||||
if body.cycles_device is not None and body.cycles_device not in VALID_CYCLES_DEVICES:
|
if body.cycles_device is not None and body.cycles_device not in VALID_CYCLES_DEVICES:
|
||||||
raise HTTPException(400, detail=f"Invalid cycles_device. Choose: {', '.join(sorted(VALID_CYCLES_DEVICES))}")
|
raise HTTPException(400, detail=f"Invalid cycles_device. Choose: {', '.join(sorted(VALID_CYCLES_DEVICES))}")
|
||||||
if body.render_backend is not None and body.render_backend not in VALID_RENDER_BACKENDS:
|
|
||||||
raise HTTPException(400, detail=f"Invalid render_backend. Choose: {', '.join(sorted(VALID_RENDER_BACKENDS))}")
|
|
||||||
if body.flamenco_worker_count is not None and not (1 <= body.flamenco_worker_count <= 16):
|
|
||||||
raise HTTPException(400, detail="flamenco_worker_count must be 1–16")
|
|
||||||
if body.blender_max_concurrent_renders is not None and not (1 <= body.blender_max_concurrent_renders <= 16):
|
if body.blender_max_concurrent_renders is not None and not (1 <= body.blender_max_concurrent_renders <= 16):
|
||||||
raise HTTPException(400, detail="blender_max_concurrent_renders must be 1–16")
|
raise HTTPException(400, detail="blender_max_concurrent_renders must be 1–16")
|
||||||
if body.render_stall_timeout_minutes is not None and not (10 <= body.render_stall_timeout_minutes <= 10080):
|
if body.render_stall_timeout_minutes is not None and not (10 <= body.render_stall_timeout_minutes <= 10080):
|
||||||
@@ -252,8 +230,6 @@ async def update_settings(
|
|||||||
updates["blender_cycles_samples"] = str(body.blender_cycles_samples)
|
updates["blender_cycles_samples"] = str(body.blender_cycles_samples)
|
||||||
if body.blender_eevee_samples is not None:
|
if body.blender_eevee_samples is not None:
|
||||||
updates["blender_eevee_samples"] = str(body.blender_eevee_samples)
|
updates["blender_eevee_samples"] = str(body.blender_eevee_samples)
|
||||||
if body.threejs_render_size is not None:
|
|
||||||
updates["threejs_render_size"] = str(body.threejs_render_size)
|
|
||||||
if body.thumbnail_format is not None:
|
if body.thumbnail_format is not None:
|
||||||
updates["thumbnail_format"] = body.thumbnail_format
|
updates["thumbnail_format"] = body.thumbnail_format
|
||||||
if body.stl_quality is not None:
|
if body.stl_quality is not None:
|
||||||
@@ -264,10 +240,6 @@ async def update_settings(
|
|||||||
updates["cycles_device"] = body.cycles_device
|
updates["cycles_device"] = body.cycles_device
|
||||||
if body.render_backend is not None:
|
if body.render_backend is not None:
|
||||||
updates["render_backend"] = body.render_backend
|
updates["render_backend"] = body.render_backend
|
||||||
if body.flamenco_manager_url is not None:
|
|
||||||
updates["flamenco_manager_url"] = body.flamenco_manager_url
|
|
||||||
if body.flamenco_worker_count is not None:
|
|
||||||
updates["flamenco_worker_count"] = str(body.flamenco_worker_count)
|
|
||||||
if body.blender_max_concurrent_renders is not None:
|
if body.blender_max_concurrent_renders is not None:
|
||||||
updates["blender_max_concurrent_renders"] = str(body.blender_max_concurrent_renders)
|
updates["blender_max_concurrent_renders"] = str(body.blender_max_concurrent_renders)
|
||||||
if body.render_stall_timeout_minutes is not None:
|
if body.render_stall_timeout_minutes is not None:
|
||||||
@@ -392,7 +364,6 @@ async def renderer_status(
|
|||||||
services = {
|
services = {
|
||||||
"pillow": {"url": None, "available": True, "note": "Built-in (always available)"},
|
"pillow": {"url": None, "available": True, "note": "Built-in (always available)"},
|
||||||
"blender": {"url": "http://blender-renderer:8100/health", "available": False, "note": ""},
|
"blender": {"url": "http://blender-renderer:8100/health", "available": False, "note": ""},
|
||||||
"threejs": {"url": "http://threejs-renderer:8101/health", "available": False, "note": ""},
|
|
||||||
}
|
}
|
||||||
async with httpx.AsyncClient(timeout=3.0) as client:
|
async with httpx.AsyncClient(timeout=3.0) as client:
|
||||||
for name, info in services.items():
|
for name, info in services.items():
|
||||||
@@ -409,78 +380,3 @@ async def renderer_status(
|
|||||||
return services
|
return services
|
||||||
|
|
||||||
|
|
||||||
@router.get("/settings/flamenco-status")
|
|
||||||
async def flamenco_status(
|
|
||||||
admin: User = Depends(require_admin),
|
|
||||||
db: AsyncSession = Depends(get_db),
|
|
||||||
):
|
|
||||||
"""Check Flamenco Manager health and list workers."""
|
|
||||||
raw = await _load_settings(db)
|
|
||||||
manager_url = raw.get("flamenco_manager_url", "http://flamenco-manager:8080")
|
|
||||||
|
|
||||||
from app.services.flamenco_client import get_flamenco_client
|
|
||||||
client = get_flamenco_client(manager_url)
|
|
||||||
|
|
||||||
health = client.health_check()
|
|
||||||
workers: list[dict] = []
|
|
||||||
|
|
||||||
if health["available"]:
|
|
||||||
try:
|
|
||||||
workers = client.list_workers()
|
|
||||||
except Exception as exc:
|
|
||||||
workers = [{"error": str(exc)[:200]}]
|
|
||||||
|
|
||||||
return {
|
|
||||||
"manager": health,
|
|
||||||
"workers": workers,
|
|
||||||
"manager_url": manager_url,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class WorkerCountBody(BaseModel):
|
|
||||||
count: int
|
|
||||||
|
|
||||||
|
|
||||||
@router.get("/settings/flamenco-worker-actual")
|
|
||||||
async def get_flamenco_worker_actual(admin: User = Depends(require_admin)):
|
|
||||||
"""Return the number of flamenco-worker containers currently running."""
|
|
||||||
from app.services.docker_scaler import get_running_worker_count
|
|
||||||
count = await asyncio.get_event_loop().run_in_executor(None, get_running_worker_count)
|
|
||||||
return {"running": count, "available": count >= 0}
|
|
||||||
|
|
||||||
|
|
||||||
@router.post("/settings/flamenco-worker-count")
|
|
||||||
async def set_flamenco_worker_count(
|
|
||||||
body: WorkerCountBody,
|
|
||||||
admin: User = Depends(require_admin),
|
|
||||||
db: AsyncSession = Depends(get_db),
|
|
||||||
):
|
|
||||||
"""Scale Flamenco worker containers to the requested count via Docker socket."""
|
|
||||||
if not (1 <= body.count <= 16):
|
|
||||||
raise HTTPException(400, detail="Worker count must be 1–16")
|
|
||||||
|
|
||||||
# Save desired count to settings first
|
|
||||||
await _save_setting(db, "flamenco_worker_count", str(body.count))
|
|
||||||
await db.commit()
|
|
||||||
|
|
||||||
# Perform actual Docker scaling in a thread (blocking SDK call)
|
|
||||||
from app.services.docker_scaler import scale_workers
|
|
||||||
try:
|
|
||||||
result = await asyncio.get_event_loop().run_in_executor(None, scale_workers, body.count)
|
|
||||||
return {
|
|
||||||
"count": body.count,
|
|
||||||
"previous": result["previous"],
|
|
||||||
"current": result["current"],
|
|
||||||
"delta": result["delta"],
|
|
||||||
"message": result["message"],
|
|
||||||
}
|
|
||||||
except Exception as exc:
|
|
||||||
# Scaling failed — return a warning but keep the saved setting
|
|
||||||
return {
|
|
||||||
"count": body.count,
|
|
||||||
"previous": -1,
|
|
||||||
"current": -1,
|
|
||||||
"delta": 0,
|
|
||||||
"message": f"Setting saved, but Docker scaling failed: {exc}. "
|
|
||||||
f"Run `docker compose up -d --scale flamenco-worker={body.count}` manually.",
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -920,44 +920,17 @@ async def cancel_line_render(
|
|||||||
if line.render_status not in ("processing", "pending"):
|
if line.render_status not in ("processing", "pending"):
|
||||||
raise HTTPException(400, detail=f"Line render_status is '{line.render_status}', nothing to cancel")
|
raise HTTPException(400, detail=f"Line render_status is '{line.render_status}', nothing to cancel")
|
||||||
|
|
||||||
cancelled_backend = line.render_backend_used or "unknown"
|
cancelled_backend = line.render_backend_used or "celery"
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
|
|
||||||
# Cancel Flamenco job if applicable
|
# Revoke Celery task (best-effort)
|
||||||
if line.render_backend_used == "flamenco" and line.flamenco_job_id:
|
try:
|
||||||
try:
|
from app.tasks.celery_app import celery_app
|
||||||
from app.services.flamenco_client import get_flamenco_client
|
celery_app.control.revoke(
|
||||||
from app.models.system_setting import SystemSetting
|
f"render-{line_id}", terminate=True, signal="SIGTERM"
|
||||||
row = await db.execute(
|
)
|
||||||
select(SystemSetting).where(SystemSetting.key == "flamenco_manager_url")
|
except Exception as exc:
|
||||||
)
|
errors.append(f"Celery revoke failed: {str(exc)[:200]}")
|
||||||
setting = row.scalar_one_or_none()
|
|
||||||
url = setting.value if setting else "http://flamenco-manager:8080"
|
|
||||||
client = get_flamenco_client(url)
|
|
||||||
client.cancel_job(line.flamenco_job_id)
|
|
||||||
except Exception as exc:
|
|
||||||
errors.append(f"Flamenco cancel failed: {str(exc)[:200]}")
|
|
||||||
|
|
||||||
# Revoke Celery task if applicable
|
|
||||||
if line.render_backend_used == "celery" or not line.render_backend_used:
|
|
||||||
try:
|
|
||||||
from app.tasks.celery_app import celery_app
|
|
||||||
celery_app.control.revoke(
|
|
||||||
f"render-{line_id}", terminate=True, signal="SIGTERM"
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
errors.append(f"Celery revoke failed: {str(exc)[:200]}")
|
|
||||||
|
|
||||||
# Also kill the Blender subprocess in the renderer microservice.
|
|
||||||
# The job_id sent to blender-renderer equals the order_line_id.
|
|
||||||
try:
|
|
||||||
import httpx as _httpx
|
|
||||||
_httpx.post(
|
|
||||||
f"http://blender-renderer:8100/cancel/{line_id}",
|
|
||||||
timeout=5.0,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass # best-effort; renderer may not be running a job for this line
|
|
||||||
|
|
||||||
# Mark line as cancelled
|
# Mark line as cancelled
|
||||||
from sqlalchemy import update as sql_update
|
from sqlalchemy import update as sql_update
|
||||||
@@ -1013,47 +986,21 @@ async def cancel_order_renders(
|
|||||||
if not lines:
|
if not lines:
|
||||||
raise HTTPException(400, detail="No active renders to cancel")
|
raise HTTPException(400, detail="No active renders to cancel")
|
||||||
|
|
||||||
from app.services.flamenco_client import get_flamenco_client
|
|
||||||
from app.models.system_setting import SystemSetting
|
|
||||||
from app.tasks.celery_app import celery_app
|
from app.tasks.celery_app import celery_app
|
||||||
from sqlalchemy import update as sql_update
|
from sqlalchemy import update as sql_update
|
||||||
|
|
||||||
# Load Flamenco URL once
|
|
||||||
row = await db.execute(
|
|
||||||
select(SystemSetting).where(SystemSetting.key == "flamenco_manager_url")
|
|
||||||
)
|
|
||||||
setting = row.scalar_one_or_none()
|
|
||||||
flamenco_url = setting.value if setting else "http://flamenco-manager:8080"
|
|
||||||
|
|
||||||
now = datetime.utcnow()
|
now = datetime.utcnow()
|
||||||
cancelled_count = 0
|
cancelled_count = 0
|
||||||
errors: list[str] = []
|
errors: list[str] = []
|
||||||
|
|
||||||
for line in lines:
|
for line in lines:
|
||||||
# Cancel Flamenco job
|
# Revoke Celery task (best-effort)
|
||||||
if line.render_backend_used == "flamenco" and line.flamenco_job_id:
|
try:
|
||||||
try:
|
celery_app.control.revoke(
|
||||||
client = get_flamenco_client(flamenco_url)
|
f"render-{line.id}", terminate=True, signal="SIGTERM"
|
||||||
client.cancel_job(line.flamenco_job_id)
|
)
|
||||||
except Exception as exc:
|
except Exception:
|
||||||
errors.append(f"Line {line.id}: Flamenco cancel failed: {str(exc)[:100]}")
|
pass
|
||||||
|
|
||||||
# Revoke Celery task + kill Blender subprocess in renderer service
|
|
||||||
if line.render_backend_used == "celery" or not line.render_backend_used:
|
|
||||||
try:
|
|
||||||
celery_app.control.revoke(
|
|
||||||
f"render-{line.id}", terminate=True, signal="SIGTERM"
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass # Celery revoke is best-effort
|
|
||||||
try:
|
|
||||||
import httpx as _httpx
|
|
||||||
_httpx.post(
|
|
||||||
f"http://blender-renderer:8100/cancel/{line.id}",
|
|
||||||
timeout=5.0,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
pass # best-effort
|
|
||||||
|
|
||||||
await db.execute(
|
await db.execute(
|
||||||
sql_update(OrderLine)
|
sql_update(OrderLine)
|
||||||
|
|||||||
@@ -38,7 +38,6 @@ class RenderJobEntry(BaseModel):
|
|||||||
output_type_name: str | None
|
output_type_name: str | None
|
||||||
render_status: str
|
render_status: str
|
||||||
render_backend_used: str | None
|
render_backend_used: str | None
|
||||||
flamenco_job_id: str | None
|
|
||||||
render_started_at: str | None
|
render_started_at: str | None
|
||||||
render_completed_at: str | None
|
render_completed_at: str | None
|
||||||
updated_at: str
|
updated_at: str
|
||||||
@@ -140,7 +139,6 @@ async def get_worker_activity(
|
|||||||
output_type_name=rl.output_type.name if rl.output_type else None,
|
output_type_name=rl.output_type.name if rl.output_type else None,
|
||||||
render_status=rl.render_status,
|
render_status=rl.render_status,
|
||||||
render_backend_used=rl.render_backend_used,
|
render_backend_used=rl.render_backend_used,
|
||||||
flamenco_job_id=rl.flamenco_job_id,
|
|
||||||
render_started_at=rl.render_started_at.isoformat() if rl.render_started_at else None,
|
render_started_at=rl.render_started_at.isoformat() if rl.render_started_at else None,
|
||||||
render_completed_at=rl.render_completed_at.isoformat() if rl.render_completed_at else None,
|
render_completed_at=rl.render_completed_at.isoformat() if rl.render_completed_at else None,
|
||||||
updated_at=rl.updated_at.isoformat(),
|
updated_at=rl.updated_at.isoformat(),
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from sqlalchemy import String, DateTime, Boolean, Text, Integer, ForeignKey
|
|||||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||||
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
from sqlalchemy.dialects.postgresql import UUID, JSONB
|
||||||
|
|
||||||
VALID_RENDER_BACKENDS = {"celery", "flamenco", "auto"}
|
VALID_RENDER_BACKENDS = {"celery"}
|
||||||
from app.database import Base
|
from app.database import Base
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,177 +0,0 @@
|
|||||||
"""Scale Flamenco worker containers via the Docker socket.
|
|
||||||
|
|
||||||
Uses the Docker Python SDK (docker>=6.1.0) to list, start, and stop containers.
|
|
||||||
Requires /var/run/docker.sock to be mounted into the backend container.
|
|
||||||
"""
|
|
||||||
import os
|
|
||||||
import logging
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
COMPOSE_PROJECT = os.getenv("COMPOSE_PROJECT_NAME", "schaefflerautomat")
|
|
||||||
SERVICE_NAME = "flamenco-worker"
|
|
||||||
|
|
||||||
|
|
||||||
def _get_client():
|
|
||||||
import docker
|
|
||||||
return docker.from_env()
|
|
||||||
|
|
||||||
|
|
||||||
def get_worker_containers(client=None):
|
|
||||||
"""Return all flamenco-worker containers (running + stopped) sorted by name."""
|
|
||||||
if client is None:
|
|
||||||
client = _get_client()
|
|
||||||
return sorted(
|
|
||||||
client.containers.list(
|
|
||||||
all=True,
|
|
||||||
filters={
|
|
||||||
"label": [
|
|
||||||
f"com.docker.compose.project={COMPOSE_PROJECT}",
|
|
||||||
f"com.docker.compose.service={SERVICE_NAME}",
|
|
||||||
]
|
|
||||||
},
|
|
||||||
),
|
|
||||||
key=lambda c: c.name,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def get_running_worker_count(client=None) -> int:
|
|
||||||
"""Return how many flamenco-worker containers are currently running."""
|
|
||||||
try:
|
|
||||||
if client is None:
|
|
||||||
client = _get_client()
|
|
||||||
containers = get_worker_containers(client)
|
|
||||||
return sum(1 for c in containers if c.status == "running")
|
|
||||||
except Exception as exc:
|
|
||||||
log.warning("docker_scaler: could not read worker count: %s", exc)
|
|
||||||
return -1
|
|
||||||
|
|
||||||
|
|
||||||
def scale_workers(target: int) -> dict:
|
|
||||||
"""Scale flamenco-worker containers to *target* count.
|
|
||||||
|
|
||||||
Returns a dict with keys:
|
|
||||||
previous – containers running before
|
|
||||||
current – containers running after
|
|
||||||
delta – change (negative = stopped, positive = started)
|
|
||||||
message – human-readable summary
|
|
||||||
"""
|
|
||||||
import docker
|
|
||||||
from docker.types import Mount
|
|
||||||
|
|
||||||
client = _get_client()
|
|
||||||
|
|
||||||
all_workers = get_worker_containers(client)
|
|
||||||
running = [c for c in all_workers if c.status == "running"]
|
|
||||||
previous = len(running)
|
|
||||||
|
|
||||||
if target == previous:
|
|
||||||
return {"previous": previous, "current": previous, "delta": 0,
|
|
||||||
"message": f"Already at {previous} worker(s) — no change"}
|
|
||||||
|
|
||||||
# ── Scale down ────────────────────────────────────────────────────────────
|
|
||||||
if target < previous:
|
|
||||||
# Stop highest-numbered containers first to minimise disruption
|
|
||||||
to_stop = sorted(running, key=lambda c: c.name, reverse=True)[: previous - target]
|
|
||||||
for c in to_stop:
|
|
||||||
log.info("docker_scaler: stopping %s", c.name)
|
|
||||||
c.stop(timeout=20)
|
|
||||||
c.remove()
|
|
||||||
return {
|
|
||||||
"previous": previous,
|
|
||||||
"current": target,
|
|
||||||
"delta": target - previous,
|
|
||||||
"message": f"Stopped {len(to_stop)} worker(s): {[c.name for c in to_stop]}",
|
|
||||||
}
|
|
||||||
|
|
||||||
# ── Scale up ──────────────────────────────────────────────────────────────
|
|
||||||
template = running[0] if running else (all_workers[0] if all_workers else None)
|
|
||||||
if template is None:
|
|
||||||
raise RuntimeError(
|
|
||||||
"No existing flamenco-worker container found to clone configuration from. "
|
|
||||||
"Ensure at least one worker container exists (even if stopped)."
|
|
||||||
)
|
|
||||||
|
|
||||||
attrs = template.attrs
|
|
||||||
image = attrs["Config"]["Image"]
|
|
||||||
env = attrs["Config"].get("Env") or []
|
|
||||||
|
|
||||||
# Reconstruct mounts from the template container
|
|
||||||
mounts = []
|
|
||||||
for m in (attrs.get("Mounts") or []):
|
|
||||||
mount_type = m.get("Type", "bind")
|
|
||||||
source = m.get("Name", "") if mount_type == "volume" else m.get("Source", "")
|
|
||||||
mounts.append(
|
|
||||||
Mount(
|
|
||||||
target=m["Destination"],
|
|
||||||
source=source,
|
|
||||||
type=mount_type,
|
|
||||||
read_only=not m.get("RW", True),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Reconstruct GPU device requests (nvidia)
|
|
||||||
device_requests = None
|
|
||||||
raw_dr = (attrs.get("HostConfig") or {}).get("DeviceRequests") or []
|
|
||||||
if raw_dr:
|
|
||||||
device_requests = []
|
|
||||||
for dr in raw_dr:
|
|
||||||
device_requests.append(
|
|
||||||
docker.types.DeviceRequest(
|
|
||||||
driver=dr.get("Driver", ""),
|
|
||||||
count=dr.get("Count", -1),
|
|
||||||
device_ids=dr.get("DeviceIDs") or [],
|
|
||||||
capabilities=dr.get("Capabilities") or [],
|
|
||||||
options=dr.get("Options") or {},
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# Network(s) the template is connected to
|
|
||||||
network_names = list(
|
|
||||||
(attrs.get("NetworkSettings") or {}).get("Networks", {}).keys()
|
|
||||||
)
|
|
||||||
|
|
||||||
restart_policy_name = (
|
|
||||||
(attrs.get("HostConfig") or {})
|
|
||||||
.get("RestartPolicy", {})
|
|
||||||
.get("Name", "unless-stopped")
|
|
||||||
) or "unless-stopped"
|
|
||||||
|
|
||||||
started = []
|
|
||||||
for i in range(previous + 1, target + 1):
|
|
||||||
new_name = f"{COMPOSE_PROJECT}-{SERVICE_NAME}-{i}"
|
|
||||||
labels = {
|
|
||||||
"com.docker.compose.project": COMPOSE_PROJECT,
|
|
||||||
"com.docker.compose.service": SERVICE_NAME,
|
|
||||||
"com.docker.compose.container-number": str(i),
|
|
||||||
}
|
|
||||||
|
|
||||||
log.info("docker_scaler: creating %s from image %s", new_name, image)
|
|
||||||
container = client.containers.create(
|
|
||||||
image=image,
|
|
||||||
name=new_name,
|
|
||||||
environment=env,
|
|
||||||
labels=labels,
|
|
||||||
mounts=mounts,
|
|
||||||
restart_policy={"Name": restart_policy_name},
|
|
||||||
device_requests=device_requests,
|
|
||||||
)
|
|
||||||
|
|
||||||
for net_name in network_names:
|
|
||||||
try:
|
|
||||||
net = client.networks.get(net_name)
|
|
||||||
net.connect(container)
|
|
||||||
log.info("docker_scaler: connected %s to network %s", new_name, net_name)
|
|
||||||
except Exception as exc:
|
|
||||||
log.warning("docker_scaler: could not connect to network %s: %s", net_name, exc)
|
|
||||||
|
|
||||||
container.start()
|
|
||||||
started.append(new_name)
|
|
||||||
log.info("docker_scaler: started %s", new_name)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"previous": previous,
|
|
||||||
"current": target,
|
|
||||||
"delta": target - previous,
|
|
||||||
"message": f"Started {len(started)} new worker(s): {started}",
|
|
||||||
}
|
|
||||||
@@ -1,121 +0,0 @@
|
|||||||
"""Flamenco Manager REST API client.
|
|
||||||
|
|
||||||
Uses httpx (sync) for compatibility with Celery tasks and FastAPI endpoints.
|
|
||||||
"""
|
|
||||||
import logging
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
DEFAULT_TIMEOUT = 10.0
|
|
||||||
|
|
||||||
|
|
||||||
class FlamencoClient:
|
|
||||||
"""Thin wrapper around the Flamenco Manager v3 REST API."""
|
|
||||||
|
|
||||||
def __init__(self, manager_url: str):
|
|
||||||
self.base_url = manager_url.rstrip("/")
|
|
||||||
|
|
||||||
def _url(self, path: str) -> str:
|
|
||||||
return f"{self.base_url}{path}"
|
|
||||||
|
|
||||||
# ── Job management ──────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def submit_job(
|
|
||||||
self,
|
|
||||||
name: str,
|
|
||||||
job_type: str,
|
|
||||||
settings: dict[str, Any],
|
|
||||||
metadata: dict[str, str] | None = None,
|
|
||||||
priority: int = 50,
|
|
||||||
) -> dict:
|
|
||||||
"""Submit a new render job to Flamenco Manager.
|
|
||||||
|
|
||||||
Returns the created job dict (includes 'id').
|
|
||||||
"""
|
|
||||||
payload = {
|
|
||||||
"name": name,
|
|
||||||
"type": job_type,
|
|
||||||
"submitter_platform": "linux",
|
|
||||||
"settings": settings,
|
|
||||||
"metadata": metadata or {},
|
|
||||||
"priority": priority,
|
|
||||||
}
|
|
||||||
resp = httpx.post(
|
|
||||||
self._url("/api/v3/jobs"),
|
|
||||||
json=payload,
|
|
||||||
timeout=DEFAULT_TIMEOUT,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
return resp.json()
|
|
||||||
|
|
||||||
def get_job(self, job_id: str) -> dict:
|
|
||||||
"""Get job details by ID."""
|
|
||||||
resp = httpx.get(
|
|
||||||
self._url(f"/api/v3/jobs/{job_id}"),
|
|
||||||
timeout=DEFAULT_TIMEOUT,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
return resp.json()
|
|
||||||
|
|
||||||
def cancel_job(self, job_id: str) -> None:
|
|
||||||
"""Request cancellation of a job."""
|
|
||||||
resp = httpx.post(
|
|
||||||
self._url(f"/api/v3/jobs/{job_id}/setstatus"),
|
|
||||||
json={"status": "cancel-requested"},
|
|
||||||
timeout=DEFAULT_TIMEOUT,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
|
|
||||||
# ── Workers ─────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def list_workers(self) -> list[dict]:
|
|
||||||
"""List all registered workers."""
|
|
||||||
resp = httpx.get(
|
|
||||||
self._url("/api/v3/worker-mgt/workers"),
|
|
||||||
timeout=DEFAULT_TIMEOUT,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
return data.get("workers", data) if isinstance(data, dict) else data
|
|
||||||
|
|
||||||
# ── Farm status ─────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
def get_farm_status(self) -> dict:
|
|
||||||
"""Get overall farm status from the Manager."""
|
|
||||||
resp = httpx.get(
|
|
||||||
self._url("/api/v3/configuration"),
|
|
||||||
timeout=DEFAULT_TIMEOUT,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
return resp.json()
|
|
||||||
|
|
||||||
def health_check(self) -> dict:
|
|
||||||
"""Check if the Flamenco Manager is reachable and return version info."""
|
|
||||||
try:
|
|
||||||
resp = httpx.get(
|
|
||||||
self._url("/api/v3/version"),
|
|
||||||
timeout=5.0,
|
|
||||||
)
|
|
||||||
resp.raise_for_status()
|
|
||||||
data = resp.json()
|
|
||||||
return {
|
|
||||||
"available": True,
|
|
||||||
"version": data.get("version", "unknown"),
|
|
||||||
"name": data.get("name", "Flamenco"),
|
|
||||||
}
|
|
||||||
except Exception as exc:
|
|
||||||
logger.warning(f"Flamenco health check failed: {exc}")
|
|
||||||
return {
|
|
||||||
"available": False,
|
|
||||||
"version": None,
|
|
||||||
"name": None,
|
|
||||||
"error": str(exc)[:200],
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def get_flamenco_client(manager_url: str) -> FlamencoClient:
|
|
||||||
"""Factory that creates a FlamencoClient from a manager URL."""
|
|
||||||
return FlamencoClient(manager_url)
|
|
||||||
@@ -1,12 +1,7 @@
|
|||||||
"""Render dispatcher — routes render jobs to Celery or Flamenco.
|
"""Render dispatcher — routes render jobs to Celery.
|
||||||
|
|
||||||
Backend selection priority:
|
All renders run via Celery workers (Flamenco removed in v2 refactor).
|
||||||
1. OutputType.render_backend per-type override ("celery" / "flamenco")
|
|
||||||
2. OutputType.is_animation — animations default to Flamenco
|
|
||||||
3. System setting render_backend — global default ("celery" / "flamenco" / "auto")
|
|
||||||
4. "auto" mode: stills → Celery, animations → Flamenco
|
|
||||||
"""
|
"""
|
||||||
import json
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
@@ -14,7 +9,6 @@ from sqlalchemy import select, update as sql_update
|
|||||||
from sqlalchemy.orm import Session, joinedload
|
from sqlalchemy.orm import Session, joinedload
|
||||||
|
|
||||||
from app.models.order_line import OrderLine
|
from app.models.order_line import OrderLine
|
||||||
from app.models.output_type import OutputType
|
|
||||||
from app.models.product import Product
|
from app.models.product import Product
|
||||||
from app.models.system_setting import SystemSetting
|
from app.models.system_setting import SystemSetting
|
||||||
|
|
||||||
@@ -29,113 +23,11 @@ def _load_setting(session: Session, key: str, default: str = "") -> str:
|
|||||||
return row.value if row else default
|
return row.value if row else default
|
||||||
|
|
||||||
|
|
||||||
def resolve_backend(output_type: OutputType | None, system_backend: str) -> str:
|
|
||||||
"""Determine which backend to use for a given output type.
|
|
||||||
|
|
||||||
Returns "celery" or "flamenco".
|
|
||||||
"""
|
|
||||||
if output_type is None:
|
|
||||||
return "celery"
|
|
||||||
|
|
||||||
# Priority 1: explicit per-type override
|
|
||||||
ot_backend = output_type.render_backend
|
|
||||||
if ot_backend in ("celery", "flamenco"):
|
|
||||||
return ot_backend
|
|
||||||
|
|
||||||
# Priority 2+3: is_animation + system setting
|
|
||||||
if system_backend in ("celery", "flamenco"):
|
|
||||||
return system_backend
|
|
||||||
|
|
||||||
# Priority 4: auto mode — animations → Flamenco, stills → Celery
|
|
||||||
if output_type.is_animation:
|
|
||||||
return "flamenco"
|
|
||||||
return "celery"
|
|
||||||
|
|
||||||
|
|
||||||
def build_flamenco_job_settings(
|
|
||||||
output_type: OutputType,
|
|
||||||
product: Product,
|
|
||||||
step_path: str,
|
|
||||||
output_dir: str,
|
|
||||||
system_settings: dict[str, str],
|
|
||||||
lighting_only: bool = False,
|
|
||||||
shadow_catcher: bool = False,
|
|
||||||
camera_orbit: bool = True,
|
|
||||||
cycles_device: str = "auto",
|
|
||||||
rotation_x: float = 0.0,
|
|
||||||
rotation_y: float = 0.0,
|
|
||||||
rotation_z: float = 0.0,
|
|
||||||
) -> dict:
|
|
||||||
"""Build Flamenco job settings from output type and product metadata."""
|
|
||||||
render_settings = output_type.render_settings or {}
|
|
||||||
engine = render_settings.get("engine", system_settings.get("blender_engine", "cycles"))
|
|
||||||
samples_key = f"blender_{engine}_samples"
|
|
||||||
samples = render_settings.get("samples", int(system_settings.get(samples_key, "256")))
|
|
||||||
stl_quality = render_settings.get("stl_quality", system_settings.get("stl_quality", "low"))
|
|
||||||
width = render_settings.get("width", 1920 if output_type.is_animation else 1024)
|
|
||||||
height = render_settings.get("height", 1080 if output_type.is_animation else 1024)
|
|
||||||
|
|
||||||
part_colors = {}
|
|
||||||
part_names_ordered = []
|
|
||||||
if product.cad_file and product.cad_file.parsed_objects:
|
|
||||||
part_names_ordered = product.cad_file.parsed_objects.get("objects", [])
|
|
||||||
materials_source = product.cad_part_materials
|
|
||||||
if materials_source:
|
|
||||||
from app.services.step_processor import build_part_colors
|
|
||||||
part_colors = build_part_colors(part_names_ordered, materials_source)
|
|
||||||
|
|
||||||
transparent_bg = bool(output_type.transparent_bg) if hasattr(output_type, 'transparent_bg') else False
|
|
||||||
|
|
||||||
settings = {
|
|
||||||
"step_path": step_path,
|
|
||||||
"engine": engine,
|
|
||||||
"samples": samples,
|
|
||||||
"stl_quality": stl_quality,
|
|
||||||
"width": width,
|
|
||||||
"height": height,
|
|
||||||
"part_colors_json": json.dumps(part_colors),
|
|
||||||
"transparent_bg": transparent_bg,
|
|
||||||
"template_path": "",
|
|
||||||
"target_collection": "Product",
|
|
||||||
"material_library_path": "",
|
|
||||||
"material_map_json": "{}",
|
|
||||||
"part_names_ordered_json": json.dumps(part_names_ordered),
|
|
||||||
"lighting_only": lighting_only,
|
|
||||||
"shadow_catcher": shadow_catcher,
|
|
||||||
"cycles_device": cycles_device,
|
|
||||||
"rotation_x": rotation_x,
|
|
||||||
"rotation_y": rotation_y,
|
|
||||||
"rotation_z": rotation_z,
|
|
||||||
}
|
|
||||||
|
|
||||||
for dk in ('noise_threshold', 'denoiser', 'denoising_input_passes',
|
|
||||||
'denoising_prefilter', 'denoising_quality', 'denoising_use_gpu'):
|
|
||||||
settings[dk] = str(render_settings.get(dk, ""))
|
|
||||||
|
|
||||||
if output_type.is_animation:
|
|
||||||
# Turntable-specific settings
|
|
||||||
output_name = render_settings.get("output_name", "turntable")
|
|
||||||
settings["output_dir"] = output_dir
|
|
||||||
settings["output_name"] = output_name
|
|
||||||
settings["frame_count"] = render_settings.get("frame_count", 120)
|
|
||||||
settings["fps"] = render_settings.get("fps", 30)
|
|
||||||
settings["turntable_degrees"] = render_settings.get("turntable_degrees", 360)
|
|
||||||
settings["turntable_axis"] = render_settings.get("turntable_axis", "world_z")
|
|
||||||
settings["bg_color"] = render_settings.get("bg_color", "")
|
|
||||||
settings["camera_orbit"] = camera_orbit
|
|
||||||
else:
|
|
||||||
# Still-specific settings
|
|
||||||
ext = output_type.output_format or "png"
|
|
||||||
settings["output_path"] = f"{output_dir}/render.{ext}"
|
|
||||||
|
|
||||||
return settings
|
|
||||||
|
|
||||||
|
|
||||||
def dispatch_render(order_line_id: str) -> dict:
|
def dispatch_render(order_line_id: str) -> dict:
|
||||||
"""Route a render job to Celery or Flamenco based on configuration.
|
"""Dispatch a render job to Celery.
|
||||||
|
|
||||||
Must be called from a sync context (Celery task or sync wrapper).
|
Must be called from a sync context (Celery task or sync wrapper).
|
||||||
Returns {"backend": "celery"|"flamenco", "job_ref": str}.
|
Returns {"backend": "celery", "job_ref": str}.
|
||||||
"""
|
"""
|
||||||
from app.config import settings as app_settings
|
from app.config import settings as app_settings
|
||||||
from app.services.render_log import emit, clear
|
from app.services.render_log import emit, clear
|
||||||
@@ -179,196 +71,26 @@ def dispatch_render(order_line_id: str) -> dict:
|
|||||||
|
|
||||||
cad_name = line.product.cad_file.original_name if line.product.cad_file else "?"
|
cad_name = line.product.cad_file.original_name if line.product.cad_file else "?"
|
||||||
emit(order_line_id, f"CAD file: {cad_name}")
|
emit(order_line_id, f"CAD file: {cad_name}")
|
||||||
|
emit(order_line_id, "Dispatching to Celery render worker")
|
||||||
|
|
||||||
# Load system settings
|
|
||||||
system_backend = _load_setting(session, "render_backend", "celery")
|
|
||||||
flamenco_url = _load_setting(session, "flamenco_manager_url", "http://flamenco-manager:8080")
|
|
||||||
|
|
||||||
backend = resolve_backend(line.output_type, system_backend)
|
|
||||||
emit(order_line_id, f"Resolved backend: {backend}")
|
|
||||||
|
|
||||||
# Mark as processing
|
|
||||||
now = datetime.utcnow()
|
now = datetime.utcnow()
|
||||||
session.execute(
|
session.execute(
|
||||||
sql_update(OrderLine)
|
sql_update(OrderLine)
|
||||||
.where(OrderLine.id == line.id)
|
.where(OrderLine.id == line.id)
|
||||||
.values(
|
.values(
|
||||||
render_status="processing",
|
render_status="processing",
|
||||||
render_backend_used=backend,
|
render_backend_used="celery",
|
||||||
render_started_at=now,
|
render_started_at=now,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
if backend == "flamenco":
|
|
||||||
emit(order_line_id, f"Submitting job to Flamenco Manager ({flamenco_url})")
|
|
||||||
result = _dispatch_flamenco(session, line, flamenco_url)
|
|
||||||
if result.get("error"):
|
|
||||||
emit(order_line_id, f"Flamenco submit failed: {result['error']}", "error")
|
|
||||||
else:
|
|
||||||
emit(order_line_id, f"Flamenco job submitted: {result.get('job_ref', '?')}")
|
|
||||||
return result
|
|
||||||
else:
|
|
||||||
emit(order_line_id, "Dispatching to Celery render worker")
|
|
||||||
return _dispatch_celery(order_line_id)
|
|
||||||
|
|
||||||
engine_db.dispose()
|
engine_db.dispose()
|
||||||
|
return _dispatch_celery(order_line_id)
|
||||||
|
|
||||||
|
|
||||||
def _dispatch_celery(order_line_id: str) -> dict:
|
def _dispatch_celery(order_line_id: str) -> dict:
|
||||||
"""Dispatch to the existing Celery render task."""
|
"""Dispatch to the Celery render task."""
|
||||||
from app.tasks.step_tasks import render_order_line_task
|
from app.tasks.step_tasks import render_order_line_task
|
||||||
result = render_order_line_task.delay(order_line_id)
|
result = render_order_line_task.delay(order_line_id)
|
||||||
return {"backend": "celery", "job_ref": result.id}
|
return {"backend": "celery", "job_ref": result.id}
|
||||||
|
|
||||||
|
|
||||||
def _dispatch_flamenco(session: Session, line: OrderLine, flamenco_url: str) -> dict:
|
|
||||||
"""Submit a job to Flamenco Manager."""
|
|
||||||
import re
|
|
||||||
from app.services.flamenco_client import get_flamenco_client
|
|
||||||
|
|
||||||
# Load all needed system settings
|
|
||||||
all_keys = ["blender_engine", "blender_cycles_samples", "blender_eevee_samples", "stl_quality", "cycles_device"]
|
|
||||||
sys_settings = {}
|
|
||||||
for key in all_keys:
|
|
||||||
sys_settings[key] = _load_setting(session, key, "")
|
|
||||||
|
|
||||||
output_type = line.output_type
|
|
||||||
product = line.product
|
|
||||||
cad_file = product.cad_file
|
|
||||||
|
|
||||||
# Load render_position for rotation values
|
|
||||||
rotation_x = rotation_y = rotation_z = 0.0
|
|
||||||
if line.render_position_id:
|
|
||||||
from app.models.render_position import ProductRenderPosition
|
|
||||||
rp = session.get(ProductRenderPosition, line.render_position_id)
|
|
||||||
if rp:
|
|
||||||
rotation_x, rotation_y, rotation_z = rp.rotation_x, rp.rotation_y, rp.rotation_z
|
|
||||||
|
|
||||||
# Flamenco mounts the uploads volume at /shared, backend uses /app/uploads
|
|
||||||
raw_path = cad_file.stored_path if cad_file else ""
|
|
||||||
step_path = raw_path.replace("/app/uploads/", "/shared/") if raw_path else ""
|
|
||||||
output_dir = f"/shared/renders/{line.id}"
|
|
||||||
|
|
||||||
job_type = "schaeffler-turntable" if (output_type and output_type.is_animation) else "schaeffler-still"
|
|
||||||
|
|
||||||
# Resolve render template + material library BEFORE building job settings
|
|
||||||
# (template.lighting_only is needed by build_flamenco_job_settings)
|
|
||||||
from app.services.template_service import resolve_template, get_material_library_path
|
|
||||||
|
|
||||||
category_key = product.category_key if product else None
|
|
||||||
ot_id = str(line.output_type_id) if line.output_type_id else None
|
|
||||||
template = resolve_template(category_key=category_key, output_type_id=ot_id)
|
|
||||||
material_library = get_material_library_path()
|
|
||||||
|
|
||||||
# Resolve cycles_device: per-output-type override wins, fall back to system setting
|
|
||||||
ot_cycles_device = output_type.cycles_device if output_type else None
|
|
||||||
effective_cycles_device = ot_cycles_device or sys_settings.get("cycles_device", "gpu") or "gpu"
|
|
||||||
|
|
||||||
settings = build_flamenco_job_settings(
|
|
||||||
output_type=output_type,
|
|
||||||
product=product,
|
|
||||||
step_path=step_path,
|
|
||||||
output_dir=output_dir,
|
|
||||||
system_settings=sys_settings,
|
|
||||||
lighting_only=bool(template.lighting_only) if template else False,
|
|
||||||
shadow_catcher=bool(template.shadow_catcher_enabled) if template else False,
|
|
||||||
camera_orbit=bool(template.camera_orbit) if template else True,
|
|
||||||
cycles_device=effective_cycles_device,
|
|
||||||
rotation_x=rotation_x,
|
|
||||||
rotation_y=rotation_y,
|
|
||||||
rotation_z=rotation_z,
|
|
||||||
)
|
|
||||||
|
|
||||||
if template:
|
|
||||||
# Remap path for Flamenco shared volume
|
|
||||||
tmpl_path = template.blend_file_path.replace("/app/uploads/", "/shared/")
|
|
||||||
settings["template_path"] = tmpl_path
|
|
||||||
settings["target_collection"] = template.target_collection
|
|
||||||
logger.info(
|
|
||||||
f"Flamenco job: using render template '{template.name}' "
|
|
||||||
f"(id={template.id}, path={tmpl_path}, collection={template.target_collection})"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.info(
|
|
||||||
f"Flamenco job: no render template found for "
|
|
||||||
f"category_key={category_key!r}, output_type_id={ot_id!r} — using factory settings"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Material library + material map: send whenever library exists and product
|
|
||||||
# has material assignments — works with or without a render template.
|
|
||||||
# When a template is present, only apply if material_replace_enabled is set.
|
|
||||||
materials_source = product.cad_part_materials
|
|
||||||
use_materials = bool(material_library and materials_source)
|
|
||||||
if template and not template.material_replace_enabled:
|
|
||||||
use_materials = False
|
|
||||||
|
|
||||||
if use_materials:
|
|
||||||
mat_lib_path = material_library.replace("/app/uploads/", "/shared/")
|
|
||||||
settings["material_library_path"] = mat_lib_path
|
|
||||||
mat_map = {
|
|
||||||
m["part_name"]: m["material"]
|
|
||||||
for m in materials_source
|
|
||||||
if m.get("part_name") and m.get("material")
|
|
||||||
}
|
|
||||||
# Resolve raw material names to SCHAEFFLER library names via aliases
|
|
||||||
from app.services.material_service import resolve_material_map
|
|
||||||
mat_map = resolve_material_map(mat_map)
|
|
||||||
settings["material_map_json"] = json.dumps(mat_map)
|
|
||||||
|
|
||||||
# Output naming: meaningful filename instead of generic render.ext
|
|
||||||
def _sanitize(s: str) -> str:
|
|
||||||
return re.sub(r'[^\w\-.]', '_', s.strip())[:100]
|
|
||||||
|
|
||||||
product_name = product.name or product.pim_id or "product"
|
|
||||||
ot_name = output_type.name if output_type else "render"
|
|
||||||
|
|
||||||
if not (output_type and output_type.is_animation):
|
|
||||||
ext = output_type.output_format or "png" if output_type else "png"
|
|
||||||
filename = f"{_sanitize(product_name)}_{_sanitize(ot_name)}.{ext}"
|
|
||||||
settings["output_path"] = f"{output_dir}/{filename}"
|
|
||||||
|
|
||||||
metadata = {
|
|
||||||
"order_line_id": str(line.id),
|
|
||||||
"order_id": str(line.order_id),
|
|
||||||
"product_name": product.name or "",
|
|
||||||
"output_type": output_type.name if output_type else "",
|
|
||||||
"category": product.category_key or "",
|
|
||||||
}
|
|
||||||
|
|
||||||
job_name = f"{product.name or product.pim_id} - {output_type.name if output_type else 'render'}"
|
|
||||||
|
|
||||||
try:
|
|
||||||
client = get_flamenco_client(flamenco_url)
|
|
||||||
job = client.submit_job(
|
|
||||||
name=job_name[:200],
|
|
||||||
job_type=job_type,
|
|
||||||
settings=settings,
|
|
||||||
metadata=metadata,
|
|
||||||
)
|
|
||||||
job_id = job.get("id", "")
|
|
||||||
|
|
||||||
# Save flamenco_job_id
|
|
||||||
session.execute(
|
|
||||||
sql_update(OrderLine)
|
|
||||||
.where(OrderLine.id == line.id)
|
|
||||||
.values(flamenco_job_id=job_id)
|
|
||||||
)
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
logger.info(f"Flamenco job submitted: {job_id} for OrderLine {line.id}")
|
|
||||||
return {"backend": "flamenco", "job_ref": job_id}
|
|
||||||
|
|
||||||
except Exception as exc:
|
|
||||||
logger.error(f"Flamenco submit failed for OrderLine {line.id}: {exc}")
|
|
||||||
session.execute(
|
|
||||||
sql_update(OrderLine)
|
|
||||||
.where(OrderLine.id == line.id)
|
|
||||||
.values(
|
|
||||||
render_status="failed",
|
|
||||||
render_completed_at=datetime.utcnow(),
|
|
||||||
render_log={"error": f"Flamenco submit failed: {str(exc)[:500]}"},
|
|
||||||
)
|
|
||||||
)
|
|
||||||
session.commit()
|
|
||||||
return {"backend": "flamenco", "job_ref": "", "error": str(exc)}
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ celery_app = Celery(
|
|||||||
"schaefflerautomat",
|
"schaefflerautomat",
|
||||||
broker=settings.redis_url,
|
broker=settings.redis_url,
|
||||||
backend=settings.redis_url,
|
backend=settings.redis_url,
|
||||||
include=["app.tasks.step_tasks", "app.tasks.ai_tasks", "app.tasks.flamenco_tasks"],
|
include=["app.tasks.step_tasks", "app.tasks.ai_tasks"],
|
||||||
)
|
)
|
||||||
|
|
||||||
celery_app.conf.update(
|
celery_app.conf.update(
|
||||||
@@ -17,20 +17,6 @@ celery_app.conf.update(
|
|||||||
task_routes={
|
task_routes={
|
||||||
"app.tasks.step_tasks.*": {"queue": "step_processing"},
|
"app.tasks.step_tasks.*": {"queue": "step_processing"},
|
||||||
"app.tasks.ai_tasks.*": {"queue": "ai_validation"},
|
"app.tasks.ai_tasks.*": {"queue": "ai_validation"},
|
||||||
"app.tasks.flamenco_tasks.*": {"queue": "step_processing"},
|
|
||||||
},
|
|
||||||
beat_schedule={
|
|
||||||
"poll-flamenco-jobs": {
|
|
||||||
"task": "app.tasks.flamenco_tasks.poll_flamenco_jobs",
|
|
||||||
"schedule": 10.0, # every 10 seconds
|
|
||||||
# Discard if not consumed before the next run; prevents queue build-up
|
|
||||||
# when workers are busy with long-running STEP/render tasks.
|
|
||||||
"options": {"expires": 9},
|
|
||||||
},
|
|
||||||
"check-stalled-renders": {
|
|
||||||
"task": "app.tasks.flamenco_tasks.check_stalled_renders",
|
|
||||||
"schedule": 300.0, # every 5 minutes
|
|
||||||
"options": {"expires": 290},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
|
beat_schedule={},
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,335 +0,0 @@
|
|||||||
"""Celery tasks for polling Flamenco job status and watchdog recovery."""
|
|
||||||
import logging
|
|
||||||
from datetime import datetime, timedelta
|
|
||||||
|
|
||||||
from app.tasks.celery_app import celery_app
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Flamenco status → our render_status mapping
|
|
||||||
FLAMENCO_STATUS_MAP = {
|
|
||||||
"queued": "processing",
|
|
||||||
"active": "processing",
|
|
||||||
"completed": "completed",
|
|
||||||
"failed": "failed",
|
|
||||||
"canceled": "failed",
|
|
||||||
"cancel-requested": "processing",
|
|
||||||
"paused": "processing",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@celery_app.task(name="app.tasks.flamenco_tasks.poll_flamenco_jobs", queue="step_processing")
|
|
||||||
def poll_flamenco_jobs():
|
|
||||||
"""Poll Flamenco Manager for active render jobs and update OrderLine status.
|
|
||||||
|
|
||||||
Runs on a Celery Beat schedule (every 10 seconds).
|
|
||||||
|
|
||||||
Uses a Redis lock (TTL=9s) to ensure at most one poll executes per 10-second
|
|
||||||
window. When the queue backs up with many duplicates (e.g. all workers are
|
|
||||||
busy with long STEP/render tasks), duplicates acquire the lock, find it taken,
|
|
||||||
and return immediately — draining the queue without doing redundant work.
|
|
||||||
"""
|
|
||||||
import redis as redis_lib
|
|
||||||
from app.config import settings as app_settings
|
|
||||||
|
|
||||||
# Deduplicate: skip if a poll ran within the last 9 seconds
|
|
||||||
try:
|
|
||||||
r = redis_lib.from_url(app_settings.redis_url)
|
|
||||||
acquired = r.set("flamenco_poll_lock", "1", nx=True, ex=9)
|
|
||||||
if not acquired:
|
|
||||||
return {"skipped": "deduplicated"}
|
|
||||||
except Exception:
|
|
||||||
pass # Redis unavailable — proceed anyway
|
|
||||||
|
|
||||||
from sqlalchemy import create_engine, select, update as sql_update
|
|
||||||
from sqlalchemy.orm import Session
|
|
||||||
from app.models.order_line import OrderLine
|
|
||||||
from app.models.system_setting import SystemSetting
|
|
||||||
from app.services.flamenco_client import get_flamenco_client
|
|
||||||
|
|
||||||
sync_url = app_settings.database_url.replace("+asyncpg", "")
|
|
||||||
engine = create_engine(sync_url)
|
|
||||||
|
|
||||||
# Track orders whose lines transitioned to a terminal state
|
|
||||||
completed_order_ids = set()
|
|
||||||
|
|
||||||
with Session(engine) as session:
|
|
||||||
# Load Flamenco Manager URL
|
|
||||||
row = session.execute(
|
|
||||||
select(SystemSetting).where(SystemSetting.key == "flamenco_manager_url")
|
|
||||||
).scalar_one_or_none()
|
|
||||||
manager_url = row.value if row else "http://flamenco-manager:8080"
|
|
||||||
|
|
||||||
# Find all OrderLines dispatched to Flamenco that are still processing
|
|
||||||
lines = session.execute(
|
|
||||||
select(OrderLine).where(
|
|
||||||
OrderLine.render_backend_used == "flamenco",
|
|
||||||
OrderLine.render_status == "processing",
|
|
||||||
OrderLine.flamenco_job_id.isnot(None),
|
|
||||||
)
|
|
||||||
).scalars().all()
|
|
||||||
|
|
||||||
if not lines:
|
|
||||||
engine.dispose()
|
|
||||||
return {"polled": 0}
|
|
||||||
|
|
||||||
client = get_flamenco_client(manager_url)
|
|
||||||
updated = 0
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
try:
|
|
||||||
job = client.get_job(line.flamenco_job_id)
|
|
||||||
flamenco_status = job.get("status", "")
|
|
||||||
our_status = FLAMENCO_STATUS_MAP.get(flamenco_status, "processing")
|
|
||||||
|
|
||||||
if our_status == line.render_status:
|
|
||||||
continue # No change
|
|
||||||
|
|
||||||
updates = {"render_status": our_status}
|
|
||||||
|
|
||||||
if our_status == "completed":
|
|
||||||
updates["render_completed_at"] = datetime.utcnow()
|
|
||||||
# Try to extract result path from job activity
|
|
||||||
activity = job.get("activity", "")
|
|
||||||
if activity:
|
|
||||||
updates["render_log"] = {
|
|
||||||
"flamenco_job_id": line.flamenco_job_id,
|
|
||||||
"flamenco_status": flamenco_status,
|
|
||||||
"activity": activity,
|
|
||||||
}
|
|
||||||
# Set result path based on job type
|
|
||||||
job_type = job.get("type", "")
|
|
||||||
metadata = job.get("metadata", {})
|
|
||||||
if job_type == "schaeffler-turntable":
|
|
||||||
output_dir = job.get("settings", {}).get("output_dir", "")
|
|
||||||
output_name = job.get("settings", {}).get("output_name", "turntable")
|
|
||||||
updates["result_path"] = f"{output_dir}/{output_name}.mp4"
|
|
||||||
elif job_type == "schaeffler-still":
|
|
||||||
updates["result_path"] = job.get("settings", {}).get("output_path", "")
|
|
||||||
|
|
||||||
elif our_status == "failed":
|
|
||||||
updates["render_completed_at"] = datetime.utcnow()
|
|
||||||
updates["render_log"] = {
|
|
||||||
"flamenco_job_id": line.flamenco_job_id,
|
|
||||||
"flamenco_status": flamenco_status,
|
|
||||||
"error": job.get("activity", "Job failed"),
|
|
||||||
}
|
|
||||||
|
|
||||||
session.execute(
|
|
||||||
sql_update(OrderLine)
|
|
||||||
.where(OrderLine.id == line.id)
|
|
||||||
.values(**updates)
|
|
||||||
)
|
|
||||||
updated += 1
|
|
||||||
logger.info(
|
|
||||||
f"Flamenco job {line.flamenco_job_id}: "
|
|
||||||
f"{flamenco_status} → render_status={our_status}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Track orders with lines that reached a terminal state
|
|
||||||
if our_status in ("completed", "failed"):
|
|
||||||
completed_order_ids.add(str(line.order_id))
|
|
||||||
|
|
||||||
except Exception as exc:
|
|
||||||
logger.warning(
|
|
||||||
f"Failed to poll Flamenco job {line.flamenco_job_id}: {exc}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if updated:
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
engine.dispose()
|
|
||||||
|
|
||||||
# Auto-advance orders if all renderable lines are done
|
|
||||||
if completed_order_ids:
|
|
||||||
from app.services.order_status_service import check_order_completion
|
|
||||||
for oid in completed_order_ids:
|
|
||||||
check_order_completion(oid)
|
|
||||||
|
|
||||||
return {"polled": len(lines), "updated": updated}
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
# Stalled-render watchdog
|
|
||||||
# ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
@celery_app.task(name="app.tasks.flamenco_tasks.check_stalled_renders", queue="step_processing")
|
|
||||||
def check_stalled_renders():
|
|
||||||
"""Watchdog: detect and re-dispatch render jobs stuck in 'processing'.
|
|
||||||
|
|
||||||
Runs on a Celery Beat schedule (every 5 minutes).
|
|
||||||
|
|
||||||
After a docker restart, Celery workers lose in-flight tasks — the DB still
|
|
||||||
shows render_status='processing' indefinitely. This task:
|
|
||||||
|
|
||||||
* For **Celery** lines: uses Celery inspect to check whether any worker is
|
|
||||||
still actively executing the task. If not (e.g. after a restart), and
|
|
||||||
the job has been stuck longer than ``render_stall_timeout_minutes``
|
|
||||||
(default: 120 min), it is reset to 'pending' and re-dispatched.
|
|
||||||
|
|
||||||
* For **Flamenco** lines: queries the Flamenco Manager. If the manager
|
|
||||||
reports the job as still active the line is left alone; if the job is
|
|
||||||
gone or in a terminal/error state it is re-dispatched.
|
|
||||||
"""
|
|
||||||
from sqlalchemy import create_engine, select, update as sql_update
|
|
||||||
from sqlalchemy.orm import Session
|
|
||||||
from app.config import settings as app_settings
|
|
||||||
from app.models.order_line import OrderLine
|
|
||||||
from app.models.system_setting import SystemSetting
|
|
||||||
|
|
||||||
sync_url = app_settings.database_url.replace("+asyncpg", "")
|
|
||||||
engine = create_engine(sync_url)
|
|
||||||
|
|
||||||
with Session(engine) as session:
|
|
||||||
# ── Read timeout from system settings ────────────────────────────────
|
|
||||||
row = session.execute(
|
|
||||||
select(SystemSetting).where(SystemSetting.key == "render_stall_timeout_minutes")
|
|
||||||
).scalar_one_or_none()
|
|
||||||
try:
|
|
||||||
timeout_minutes = int(row.value) if row else 120
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
timeout_minutes = 120
|
|
||||||
|
|
||||||
cutoff = datetime.utcnow() - timedelta(minutes=timeout_minutes)
|
|
||||||
|
|
||||||
stalled_lines = session.execute(
|
|
||||||
select(OrderLine).where(
|
|
||||||
OrderLine.render_status == "processing",
|
|
||||||
OrderLine.render_started_at.isnot(None),
|
|
||||||
OrderLine.render_started_at < cutoff,
|
|
||||||
)
|
|
||||||
).scalars().all()
|
|
||||||
|
|
||||||
if not stalled_lines:
|
|
||||||
engine.dispose()
|
|
||||||
return {"checked": 0, "restarted": 0, "timeout_minutes": timeout_minutes}
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"[watchdog] Found %d stalled render(s) older than %d minutes",
|
|
||||||
len(stalled_lines), timeout_minutes,
|
|
||||||
)
|
|
||||||
|
|
||||||
# ── Build set of order_line_ids actively running on Celery workers ───
|
|
||||||
active_celery_line_ids: set[str] = set()
|
|
||||||
inspect_ok = False
|
|
||||||
try:
|
|
||||||
inspect = celery_app.control.inspect(timeout=2)
|
|
||||||
active_tasks = inspect.active() or {}
|
|
||||||
for worker_tasks in active_tasks.values():
|
|
||||||
for task_info in (worker_tasks or []):
|
|
||||||
args = task_info.get("args", [])
|
|
||||||
if args:
|
|
||||||
active_celery_line_ids.add(str(args[0]))
|
|
||||||
inspect_ok = True
|
|
||||||
except Exception as exc:
|
|
||||||
logger.warning(
|
|
||||||
"[watchdog] Celery inspect failed (%s) — will re-dispatch all timed-out Celery jobs",
|
|
||||||
exc,
|
|
||||||
)
|
|
||||||
|
|
||||||
# ── Load Flamenco Manager URL ─────────────────────────────────────────
|
|
||||||
manager_url = "http://flamenco-manager:8080"
|
|
||||||
try:
|
|
||||||
url_row = session.execute(
|
|
||||||
select(SystemSetting).where(SystemSetting.key == "flamenco_manager_url")
|
|
||||||
).scalar_one_or_none()
|
|
||||||
if url_row:
|
|
||||||
manager_url = url_row.value
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# ── Decide which lines to restart ────────────────────────────────────
|
|
||||||
to_restart: list[OrderLine] = []
|
|
||||||
|
|
||||||
for line in stalled_lines:
|
|
||||||
line_id = str(line.id)
|
|
||||||
|
|
||||||
if line.flamenco_job_id:
|
|
||||||
# Flamenco job: verify with manager before re-dispatching
|
|
||||||
try:
|
|
||||||
from app.services.flamenco_client import get_flamenco_client
|
|
||||||
client = get_flamenco_client(manager_url)
|
|
||||||
job = client.get_job(line.flamenco_job_id)
|
|
||||||
flamenco_status = job.get("status", "")
|
|
||||||
if flamenco_status in (
|
|
||||||
"active", "queued", "paused",
|
|
||||||
"pause-requested", "cancel-requested",
|
|
||||||
):
|
|
||||||
logger.info(
|
|
||||||
"[watchdog] Flamenco job %s is still %s — skipping line %s",
|
|
||||||
line.flamenco_job_id, flamenco_status, line_id,
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
logger.info(
|
|
||||||
"[watchdog] Flamenco job %s status=%r → re-dispatching line %s",
|
|
||||||
line.flamenco_job_id, flamenco_status, line_id,
|
|
||||||
)
|
|
||||||
except Exception as exc:
|
|
||||||
# Manager unreachable — skip to avoid false restarts
|
|
||||||
logger.warning(
|
|
||||||
"[watchdog] Cannot reach Flamenco for job %s (%s) — skipping line %s",
|
|
||||||
line.flamenco_job_id, exc, line_id,
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# Celery job: skip if still actively running on a worker
|
|
||||||
if inspect_ok and line_id in active_celery_line_ids:
|
|
||||||
logger.info(
|
|
||||||
"[watchdog] Celery render for line %s still active — skipping", line_id
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
logger.info(
|
|
||||||
"[watchdog] Celery render for line %s not found in active tasks — re-dispatching",
|
|
||||||
line_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
to_restart.append(line)
|
|
||||||
|
|
||||||
if not to_restart:
|
|
||||||
engine.dispose()
|
|
||||||
return {
|
|
||||||
"checked": len(stalled_lines),
|
|
||||||
"restarted": 0,
|
|
||||||
"timeout_minutes": timeout_minutes,
|
|
||||||
}
|
|
||||||
|
|
||||||
# ── Reset stalled lines to pending ───────────────────────────────────
|
|
||||||
for line in to_restart:
|
|
||||||
session.execute(
|
|
||||||
sql_update(OrderLine)
|
|
||||||
.where(OrderLine.id == line.id)
|
|
||||||
.values(
|
|
||||||
render_status="pending",
|
|
||||||
render_started_at=None,
|
|
||||||
render_backend_used=None,
|
|
||||||
flamenco_job_id=None,
|
|
||||||
render_log={
|
|
||||||
"watchdog": (
|
|
||||||
f"Auto-restarted after {timeout_minutes} min stall "
|
|
||||||
f"(previous backend: {line.render_backend_used or 'unknown'})"
|
|
||||||
)
|
|
||||||
},
|
|
||||||
)
|
|
||||||
)
|
|
||||||
session.commit()
|
|
||||||
|
|
||||||
engine.dispose()
|
|
||||||
|
|
||||||
# ── Re-dispatch outside DB session ───────────────────────────────────────
|
|
||||||
from app.services.render_dispatcher import dispatch_render
|
|
||||||
restarted = 0
|
|
||||||
for line in to_restart:
|
|
||||||
try:
|
|
||||||
dispatch_render(str(line.id))
|
|
||||||
restarted += 1
|
|
||||||
logger.info("[watchdog] Re-dispatched render for order line %s", line.id)
|
|
||||||
except Exception as exc:
|
|
||||||
logger.error(
|
|
||||||
"[watchdog] Failed to re-dispatch line %s: %s — left as pending", line.id, exc
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"checked": len(stalled_lines),
|
|
||||||
"restarted": restarted,
|
|
||||||
"timeout_minutes": timeout_minutes,
|
|
||||||
}
|
|
||||||
@@ -170,39 +170,6 @@ services:
|
|||||||
- ./threejs-renderer:/app
|
- ./threejs-renderer:/app
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|
||||||
flamenco-manager:
|
|
||||||
build: ./flamenco
|
|
||||||
environment:
|
|
||||||
- FLAMENCO_MODE=manager
|
|
||||||
ports:
|
|
||||||
- "8080:8080"
|
|
||||||
volumes:
|
|
||||||
- uploads:/shared
|
|
||||||
- flamenco-data:/data
|
|
||||||
- ./flamenco/scripts:/opt/flamenco/scripts
|
|
||||||
restart: unless-stopped
|
|
||||||
|
|
||||||
flamenco-worker:
|
|
||||||
build: ./flamenco
|
|
||||||
environment:
|
|
||||||
- FLAMENCO_MODE=worker
|
|
||||||
- FLAMENCO_MANAGER_URL=http://flamenco-manager:8080
|
|
||||||
volumes:
|
|
||||||
- uploads:/shared
|
|
||||||
- /opt/blender:/opt/blender:ro
|
|
||||||
- ./flamenco/scripts:/opt/flamenco/scripts
|
|
||||||
depends_on:
|
|
||||||
- flamenco-manager
|
|
||||||
deploy:
|
|
||||||
replicas: 1
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
count: 1
|
|
||||||
capabilities: [gpu, compute, utility, graphics]
|
|
||||||
restart: unless-stopped
|
|
||||||
|
|
||||||
frontend:
|
frontend:
|
||||||
build:
|
build:
|
||||||
context: ./frontend
|
context: ./frontend
|
||||||
@@ -220,4 +187,3 @@ services:
|
|||||||
volumes:
|
volumes:
|
||||||
pgdata:
|
pgdata:
|
||||||
uploads:
|
uploads:
|
||||||
flamenco-data:
|
|
||||||
|
|||||||
Reference in New Issue
Block a user