feat(gpu): GPU health check + RENDER_DEVICE_USED token + strict mode
- gpu_probe.py: Blender script that probes OPTIX/CUDA/HIP/ONEAPI and
exits 1 on no GPU — used at startup + on-demand from Admin UI
- blender_render.py, still_render.py, turntable_render.py: emit
RENDER_DEVICE_USED: engine=CYCLES device=GPU|CPU compute_type=...
after GPU activation; exit 2 when CYCLES_DEVICE=gpu and CPU fallback
- render_blender.py: parse RENDER_DEVICE_USED token into render_log
(device_used, compute_type, gpu_fallback); handle exit code 2 as
explicit GPU strict-mode failure
- check_version.py: check_gpu() runs gpu_probe.py at container startup;
CYCLES_DEVICE=gpu aborts startup if no GPU found
- docker-compose.yml: CYCLES_DEVICE=${CYCLES_DEVICE:-auto} env var
- gpu_tasks.py: probe_gpu Celery task on thumbnail_rendering queue;
saves result to system_settings.gpu_probe_last_result; beat every 30min
- worker.py: POST /probe/gpu (trigger) + GET /probe/gpu/result (last result)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -64,5 +64,62 @@ def check_version():
|
||||
print(f"Blender {version_str} OK (>= {MIN_VERSION_STR})")
|
||||
|
||||
|
||||
def check_gpu():
|
||||
"""Run the Blender GPU probe script and report results.
|
||||
|
||||
Respects CYCLES_DEVICE env var:
|
||||
- "cpu" → skip probe entirely
|
||||
- "gpu" → require GPU; abort startup if none found
|
||||
- "auto" (default) → warn if no GPU found, but continue
|
||||
"""
|
||||
cycles_device = os.environ.get("CYCLES_DEVICE", "auto").lower()
|
||||
if cycles_device == "cpu":
|
||||
print("[check_version] GPU check skipped (CYCLES_DEVICE=cpu)", flush=True)
|
||||
return
|
||||
|
||||
blender_bin = find_blender()
|
||||
probe_script = Path("/render-scripts/gpu_probe.py")
|
||||
if not probe_script.exists():
|
||||
print(
|
||||
f"[check_version] WARNING: gpu_probe.py not found at {probe_script}",
|
||||
flush=True,
|
||||
)
|
||||
return
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[blender_bin, "--background", "--python", str(probe_script)],
|
||||
capture_output=True, text=True, timeout=45,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
for line in result.stdout.splitlines():
|
||||
if "GPU_PROBE_OK" in line:
|
||||
print(f"[check_version] {line}", flush=True)
|
||||
break
|
||||
else:
|
||||
msg = "No GPU detected — renders will use CPU"
|
||||
for line in result.stdout.splitlines():
|
||||
if "GPU_PROBE_FAIL" in line:
|
||||
msg = line
|
||||
break
|
||||
if cycles_device == "gpu":
|
||||
print(f"[check_version] ERROR: {msg}", flush=True)
|
||||
print(
|
||||
"[check_version] CYCLES_DEVICE=gpu requires GPU — aborting startup",
|
||||
flush=True,
|
||||
)
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(
|
||||
f"[check_version] WARNING: {msg} (set CYCLES_DEVICE=gpu to enforce)",
|
||||
flush=True,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
print("[check_version] WARNING: GPU probe timed out after 45s", flush=True)
|
||||
except Exception as e:
|
||||
print(f"[check_version] WARNING: GPU probe failed: {e}", flush=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
check_version()
|
||||
check_gpu()
|
||||
|
||||
Reference in New Issue
Block a user