feat: initial commit
This commit is contained in:
@@ -0,0 +1,177 @@
|
||||
"""Scale Flamenco worker containers via the Docker socket.
|
||||
|
||||
Uses the Docker Python SDK (docker>=6.1.0) to list, start, and stop containers.
|
||||
Requires /var/run/docker.sock to be mounted into the backend container.
|
||||
"""
|
||||
import os
|
||||
import logging
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
COMPOSE_PROJECT = os.getenv("COMPOSE_PROJECT_NAME", "schaefflerautomat")
|
||||
SERVICE_NAME = "flamenco-worker"
|
||||
|
||||
|
||||
def _get_client():
|
||||
import docker
|
||||
return docker.from_env()
|
||||
|
||||
|
||||
def get_worker_containers(client=None):
|
||||
"""Return all flamenco-worker containers (running + stopped) sorted by name."""
|
||||
if client is None:
|
||||
client = _get_client()
|
||||
return sorted(
|
||||
client.containers.list(
|
||||
all=True,
|
||||
filters={
|
||||
"label": [
|
||||
f"com.docker.compose.project={COMPOSE_PROJECT}",
|
||||
f"com.docker.compose.service={SERVICE_NAME}",
|
||||
]
|
||||
},
|
||||
),
|
||||
key=lambda c: c.name,
|
||||
)
|
||||
|
||||
|
||||
def get_running_worker_count(client=None) -> int:
|
||||
"""Return how many flamenco-worker containers are currently running."""
|
||||
try:
|
||||
if client is None:
|
||||
client = _get_client()
|
||||
containers = get_worker_containers(client)
|
||||
return sum(1 for c in containers if c.status == "running")
|
||||
except Exception as exc:
|
||||
log.warning("docker_scaler: could not read worker count: %s", exc)
|
||||
return -1
|
||||
|
||||
|
||||
def scale_workers(target: int) -> dict:
|
||||
"""Scale flamenco-worker containers to *target* count.
|
||||
|
||||
Returns a dict with keys:
|
||||
previous – containers running before
|
||||
current – containers running after
|
||||
delta – change (negative = stopped, positive = started)
|
||||
message – human-readable summary
|
||||
"""
|
||||
import docker
|
||||
from docker.types import Mount
|
||||
|
||||
client = _get_client()
|
||||
|
||||
all_workers = get_worker_containers(client)
|
||||
running = [c for c in all_workers if c.status == "running"]
|
||||
previous = len(running)
|
||||
|
||||
if target == previous:
|
||||
return {"previous": previous, "current": previous, "delta": 0,
|
||||
"message": f"Already at {previous} worker(s) — no change"}
|
||||
|
||||
# ── Scale down ────────────────────────────────────────────────────────────
|
||||
if target < previous:
|
||||
# Stop highest-numbered containers first to minimise disruption
|
||||
to_stop = sorted(running, key=lambda c: c.name, reverse=True)[: previous - target]
|
||||
for c in to_stop:
|
||||
log.info("docker_scaler: stopping %s", c.name)
|
||||
c.stop(timeout=20)
|
||||
c.remove()
|
||||
return {
|
||||
"previous": previous,
|
||||
"current": target,
|
||||
"delta": target - previous,
|
||||
"message": f"Stopped {len(to_stop)} worker(s): {[c.name for c in to_stop]}",
|
||||
}
|
||||
|
||||
# ── Scale up ──────────────────────────────────────────────────────────────
|
||||
template = running[0] if running else (all_workers[0] if all_workers else None)
|
||||
if template is None:
|
||||
raise RuntimeError(
|
||||
"No existing flamenco-worker container found to clone configuration from. "
|
||||
"Ensure at least one worker container exists (even if stopped)."
|
||||
)
|
||||
|
||||
attrs = template.attrs
|
||||
image = attrs["Config"]["Image"]
|
||||
env = attrs["Config"].get("Env") or []
|
||||
|
||||
# Reconstruct mounts from the template container
|
||||
mounts = []
|
||||
for m in (attrs.get("Mounts") or []):
|
||||
mount_type = m.get("Type", "bind")
|
||||
source = m.get("Name", "") if mount_type == "volume" else m.get("Source", "")
|
||||
mounts.append(
|
||||
Mount(
|
||||
target=m["Destination"],
|
||||
source=source,
|
||||
type=mount_type,
|
||||
read_only=not m.get("RW", True),
|
||||
)
|
||||
)
|
||||
|
||||
# Reconstruct GPU device requests (nvidia)
|
||||
device_requests = None
|
||||
raw_dr = (attrs.get("HostConfig") or {}).get("DeviceRequests") or []
|
||||
if raw_dr:
|
||||
device_requests = []
|
||||
for dr in raw_dr:
|
||||
device_requests.append(
|
||||
docker.types.DeviceRequest(
|
||||
driver=dr.get("Driver", ""),
|
||||
count=dr.get("Count", -1),
|
||||
device_ids=dr.get("DeviceIDs") or [],
|
||||
capabilities=dr.get("Capabilities") or [],
|
||||
options=dr.get("Options") or {},
|
||||
)
|
||||
)
|
||||
|
||||
# Network(s) the template is connected to
|
||||
network_names = list(
|
||||
(attrs.get("NetworkSettings") or {}).get("Networks", {}).keys()
|
||||
)
|
||||
|
||||
restart_policy_name = (
|
||||
(attrs.get("HostConfig") or {})
|
||||
.get("RestartPolicy", {})
|
||||
.get("Name", "unless-stopped")
|
||||
) or "unless-stopped"
|
||||
|
||||
started = []
|
||||
for i in range(previous + 1, target + 1):
|
||||
new_name = f"{COMPOSE_PROJECT}-{SERVICE_NAME}-{i}"
|
||||
labels = {
|
||||
"com.docker.compose.project": COMPOSE_PROJECT,
|
||||
"com.docker.compose.service": SERVICE_NAME,
|
||||
"com.docker.compose.container-number": str(i),
|
||||
}
|
||||
|
||||
log.info("docker_scaler: creating %s from image %s", new_name, image)
|
||||
container = client.containers.create(
|
||||
image=image,
|
||||
name=new_name,
|
||||
environment=env,
|
||||
labels=labels,
|
||||
mounts=mounts,
|
||||
restart_policy={"Name": restart_policy_name},
|
||||
device_requests=device_requests,
|
||||
)
|
||||
|
||||
for net_name in network_names:
|
||||
try:
|
||||
net = client.networks.get(net_name)
|
||||
net.connect(container)
|
||||
log.info("docker_scaler: connected %s to network %s", new_name, net_name)
|
||||
except Exception as exc:
|
||||
log.warning("docker_scaler: could not connect to network %s: %s", net_name, exc)
|
||||
|
||||
container.start()
|
||||
started.append(new_name)
|
||||
log.info("docker_scaler: started %s", new_name)
|
||||
|
||||
return {
|
||||
"previous": previous,
|
||||
"current": target,
|
||||
"delta": target - previous,
|
||||
"message": f"Started {len(started)} new worker(s): {started}",
|
||||
}
|
||||
Reference in New Issue
Block a user