Implement executing tasks

This commit is contained in:
2026-03-03 16:41:28 +00:00
parent d6bd56dace
commit 9c14e51b43
42 changed files with 3410 additions and 121 deletions

82
scripts/quadlet/watchdog.sh Executable file
View File

@@ -0,0 +1,82 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
STACK_ENV="${STACK_ENV:-$ROOT_DIR/stack.env}"
STACK_ID="${GIA_STACK_ID:-${STACK_ID:-}}"
STACK_ID="$(echo "$STACK_ID" | tr -cs 'a-zA-Z0-9._-' '-' | sed 's/^-*//; s/-*$//')"
SLEEP_SECONDS="${WATCHDOG_SLEEP_SECONDS:-15}"
NTFY_TOPIC="${NTFY_TOPIC:-${NOTIFY_TOPIC:-}}"
NTFY_URL_BASE="${NTFY_URL_BASE:-https://ntfy.sh}"
HOST_TAG="${HOSTNAME:-$(hostname 2>/dev/null || echo unknown-host)}"
if [[ -f "$STACK_ENV" ]]; then
set -a
. "$STACK_ENV"
set +a
fi
name_with_stack() {
local base="$1"
if [[ -n "$STACK_ID" ]]; then
echo "${base}_${STACK_ID}"
else
echo "$base"
fi
}
notify() {
local title="$1"
local msg="$2"
if [[ -z "$NTFY_TOPIC" ]]; then
return 0
fi
if ! command -v curl >/dev/null 2>&1; then
return 0
fi
curl -sS -X POST "${NTFY_URL_BASE%/}/$NTFY_TOPIC" \
-H "Title: $title" \
-H "Tags: warning" \
-d "$msg" >/dev/null || true
}
CONTAINERS=(
"$(name_with_stack "gia")"
"$(name_with_stack "asgi_gia")"
"$(name_with_stack "ur_gia")"
"$(name_with_stack "scheduling_gia")"
"$(name_with_stack "codex_worker_gia")"
)
declare -A LAST_STATE
for name in "${CONTAINERS[@]}"; do
LAST_STATE["$name"]="unknown"
done
while true; do
for name in "${CONTAINERS[@]}"; do
running="false"
if inspect_out="$(podman inspect -f '{{.State.Running}}' "$name" 2>/dev/null)"; then
running="$(echo "$inspect_out" | tr -d '\n' | tr 'A-Z' 'a-z')"
fi
if [[ "$running" == "true" ]]; then
if [[ "${LAST_STATE[$name]}" != "up" ]]; then
notify "GIA recovered: $name" "[$HOST_TAG] container $name is now running"
fi
LAST_STATE["$name"]="up"
continue
fi
restart_out=""
if restart_out="$(podman restart "$name" 2>&1)"; then
LAST_STATE["$name"]="recovering"
notify "GIA restarted: $name" "[$HOST_TAG] container $name was not running and restart succeeded"
else
LAST_STATE["$name"]="down"
notify "GIA restart failed: $name" "[$HOST_TAG] restart failed for $name: $restart_out"
fi
done
sleep "$SLEEP_SECONDS"
done