#!/usr/bin/env bash set -euo pipefail APPLY=0 DEEP=0 usage() { cat <<'EOF' Usage: ./scripts/repo_hygiene.sh [options] Options: --apply Delete the reported hygiene artifacts. --deep Also remove heavy local environments (`frontend/node_modules`, `backend/.venv`). -h, --help Show this help. Behavior: Without --apply, the script only reports what it would clean. The default cleanup is conservative and removes debug/output/cache artifacts only. Examples: ./scripts/repo_hygiene.sh ./scripts/repo_hygiene.sh --apply ./scripts/repo_hygiene.sh --apply --deep EOF } log() { printf '\n[%s] %s\n' "$(date '+%H:%M:%S')" "$*" } warn() { printf '[warn] %s\n' "$*" >&2 } ownership_candidate_find_expr() { cat <<'EOF' find . \ \( -path './.git' -o -path './backend/.venv' -o -path './frontend/node_modules' \) -prune -o \ \( \ -path './tmp' -o \ -path './frontend/dist' -o \ -path './backend/.pytest_cache' -o \ -path './backend/celerybeat-schedule' -o \ -type d -name '__pycache__' -o \ -type f \( -name '*.pyc' -o -name '*.pyo' -o -name 'core' \) \ \) \ -print EOF } repair_backend_ownership_with_docker() { if ! command -v docker >/dev/null 2>&1; then return 1 fi if ! docker info >/dev/null 2>&1; then return 1 fi log "Repairing backend artifact ownership through Docker" docker run --rm \ -v "$REPO_ROOT/backend:/target" \ alpine:3.20 \ sh -lc "chown -R $(id -u):$(id -g) /target" } repair_repo_ownership_with_docker() { if ! command -v docker >/dev/null 2>&1; then return 1 fi if ! docker info >/dev/null 2>&1; then return 1 fi log "Repairing generated artifact ownership across the repository through Docker" docker run --rm \ -v "$REPO_ROOT:/target" \ alpine:3.20 \ sh -lc " find /target \\ \\( -path /target/.git -o -path /target/frontend/node_modules -o -path /target/backend/.venv \\) -prune -o \\ \\( -type d -name __pycache__ -o -type f \\( -name '*.pyc' -o -name '*.pyo' -o -name 'celerybeat-schedule' -o -name 'core' \\) \\) \\ -exec chown $(id -u):$(id -g) {} + " } ensure_repo_root() { if ! REPO_ROOT="$(git rev-parse --show-toplevel 2>/dev/null)"; then echo "This script must be run inside the git repository." >&2 exit 1 fi cd "$REPO_ROOT" } parse_args() { while [ "$#" -gt 0 ]; do case "$1" in --apply) APPLY=1 ;; --deep) DEEP=1 ;; -h|--help) usage exit 0 ;; *) echo "Unknown option: $1" >&2 usage >&2 exit 1 ;; esac shift done } print_candidates() { local label="$1" shift local cmd=("$@") log "$label" "${cmd[@]}" } print_non_writable_artifacts() { log "Generated artifacts not owned by the current user" bash -lc "$(ownership_candidate_find_expr) | while IFS= read -r path; do [ -e \"\$path\" ] || continue; if [ ! -O \"\$path\" ]; then printf '%s\n' \"\$path\"; fi; done | sort" } repo_find_expr() { cat <<'EOF' find . \ \( -path './.git' -o -path './backend/.venv' -o -path './frontend/node_modules' \) -prune -o EOF } cleanup_safe_targets() { local permission_failures=0 local target PERMISSION_FAILURE_PATHS=() local targets=( tmp frontend/dist backend/.pytest_cache backend/celerybeat-schedule ) for target in "${targets[@]}"; do [ -e "$target" ] || continue if ! rm -rf "$target" 2>/dev/null; then warn "Could not remove $target. Ownership or permissions need to be fixed first." PERMISSION_FAILURE_PATHS+=("$target") permission_failures=1 fi done while IFS= read -r target; do if ! rm -rf "$target" 2>/dev/null; then warn "Could not remove $target. Ownership or permissions need to be fixed first." PERMISSION_FAILURE_PATHS+=("$target") permission_failures=1 fi done < <(find . -type d -name __pycache__ -print) while IFS= read -r target; do if ! rm -f "$target" 2>/dev/null; then warn "Could not remove $target. Ownership or permissions need to be fixed first." PERMISSION_FAILURE_PATHS+=("$target") permission_failures=1 fi done < <(find . -type f \( -name '*.pyc' -o -name '*.pyo' \) -print) while IFS= read -r target; do if ! rm -f "$target" 2>/dev/null; then warn "Could not remove $target. Ownership or permissions need to be fixed first." PERMISSION_FAILURE_PATHS+=("$target") permission_failures=1 fi done < <(find . -type f -name 'core' -print) return "$permission_failures" } cleanup_deep_targets() { rm -rf frontend/node_modules rm -rf backend/.venv } main() { parse_args "$@" ensure_repo_root print_candidates \ "Core debug and cache artifacts" \ bash -lc "find tmp frontend/dist backend/.pytest_cache -mindepth 0 -maxdepth 0 2>/dev/null | sort" print_candidates \ "Python cache directories" \ bash -lc "$(repo_find_expr) -type d -name '__pycache__' -print | sort" print_candidates \ "Python bytecode files" \ bash -lc "$(repo_find_expr) -type f \\( -name '*.pyc' -o -name '*.pyo' \\) -print | sort | sed -n '1,200p'" print_non_writable_artifacts if [ "$DEEP" -eq 1 ]; then print_candidates \ "Heavy local environments" \ bash -lc "find frontend/node_modules backend/.venv -mindepth 0 -maxdepth 0 2>/dev/null | sort" fi if [ "$APPLY" -eq 0 ]; then log "Dry run only. Re-run with --apply to delete the reported artifacts." if [ "$DEEP" -eq 1 ]; then echo "Deep mode is enabled: node_modules and backend/.venv would also be removed." fi exit 0 fi log "Deleting conservative hygiene artifacts" CLEANUP_PERMISSION_FAILURES=0 if ! cleanup_safe_targets; then CLEANUP_PERMISSION_FAILURES=1 fi if [ "$DEEP" -eq 1 ]; then log "Deleting deep local environments" cleanup_deep_targets fi if [ "$CLEANUP_PERMISSION_FAILURES" -eq 1 ]; then if repair_repo_ownership_with_docker || repair_backend_ownership_with_docker; then log "Retrying conservative hygiene cleanup after ownership repair" CLEANUP_PERMISSION_FAILURES=0 cleanup_safe_targets || CLEANUP_PERMISSION_FAILURES=1 fi fi if [ "$CLEANUP_PERMISSION_FAILURES" -eq 1 ]; then log "Some artifacts could not be removed because they are not writable by the current user." printf 'Blocked paths:\n' printf ' %s\n' "${PERMISSION_FAILURE_PATHS[@]}" | sort -u echo echo "Run this once, then re-run the hygiene script:" echo " $(ownership_candidate_find_expr | sed 's/-print$/-print0/' | tr '\n' ' ' | sed 's/ */ /g') | sudo xargs -0 chown -R \"\$USER:\$USER\"" fi log "Remaining git status" git status --short } main "$@"