#!/usr/bin/env bash set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" STACK_ENV="${STACK_ENV:-$ROOT_DIR/stack.env}" STACK_ID="${GIA_STACK_ID:-${STACK_ID:-}}" STACK_ID="$(echo "$STACK_ID" | tr -cs 'a-zA-Z0-9._-' '-' | sed 's/^-*//; s/-*$//')" SLEEP_SECONDS="${WATCHDOG_SLEEP_SECONDS:-15}" NTFY_TOPIC="${NTFY_TOPIC:-${NOTIFY_TOPIC:-}}" NTFY_URL_BASE="${NTFY_URL_BASE:-https://ntfy.sh}" HOST_TAG="${HOSTNAME:-$(hostname 2>/dev/null || echo unknown-host)}" if [[ -f "$STACK_ENV" ]]; then set -a . "$STACK_ENV" set +a fi name_with_stack() { local base="$1" if [[ -n "$STACK_ID" ]]; then echo "${base}_${STACK_ID}" else echo "$base" fi } notify() { local title="$1" local msg="$2" if [[ -z "$NTFY_TOPIC" ]]; then return 0 fi if ! command -v curl >/dev/null 2>&1; then return 0 fi curl -sS -X POST "${NTFY_URL_BASE%/}/$NTFY_TOPIC" \ -H "Title: $title" \ -H "Tags: warning" \ -d "$msg" >/dev/null || true } CONTAINERS=( "$(name_with_stack "gia")" "$(name_with_stack "asgi_gia")" "$(name_with_stack "ur_gia")" "$(name_with_stack "scheduling_gia")" "$(name_with_stack "codex_worker_gia")" ) declare -A LAST_STATE for name in "${CONTAINERS[@]}"; do LAST_STATE["$name"]="unknown" done while true; do for name in "${CONTAINERS[@]}"; do running="false" if inspect_out="$(podman inspect -f '{{.State.Running}}' "$name" 2>/dev/null)"; then running="$(echo "$inspect_out" | tr -d '\n' | tr 'A-Z' 'a-z')" fi if [[ "$running" == "true" ]]; then if [[ "${LAST_STATE[$name]}" != "up" ]]; then notify "GIA recovered: $name" "[$HOST_TAG] container $name is now running" fi LAST_STATE["$name"]="up" continue fi restart_out="" if restart_out="$(podman restart "$name" 2>&1)"; then LAST_STATE["$name"]="recovering" notify "GIA restarted: $name" "[$HOST_TAG] container $name was not running and restart succeeded" else LAST_STATE["$name"]="down" notify "GIA restart failed: $name" "[$HOST_TAG] restart failed for $name: $restart_out" fi done sleep "$SLEEP_SECONDS" done