158 lines
4.9 KiB
Bash
158 lines
4.9 KiB
Bash
#!/usr/bin/env bash
|
|
# DictIA — Health check diagnostic
|
|
#
|
|
# Checks Docker, containers, endpoints, disk, RAM, and GPU.
|
|
#
|
|
# Usage:
|
|
# bash health-check.sh # Human-readable output
|
|
# bash health-check.sh --json # JSON output
|
|
# bash health-check.sh --quiet # Exit code only (0=ok, 1=issue)
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
OUTPUT="human"
|
|
ISSUES=0
|
|
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
--json) OUTPUT="json" ;;
|
|
--quiet) OUTPUT="quiet" ;;
|
|
esac
|
|
done
|
|
|
|
declare -A CHECKS
|
|
|
|
check() {
|
|
local name="$1"
|
|
local status="$2"
|
|
local detail="${3:-}"
|
|
CHECKS["$name"]="$status|$detail"
|
|
if [ "$status" = "error" ] || [ "$status" = "warning" ]; then
|
|
ISSUES=$((ISSUES + 1))
|
|
fi
|
|
}
|
|
|
|
# --- Docker ---
|
|
if command -v docker &>/dev/null && docker info &>/dev/null; then
|
|
check "docker" "ok" "Docker daemon running"
|
|
else
|
|
check "docker" "error" "Docker not available"
|
|
fi
|
|
|
|
# --- Containers ---
|
|
DICTIA_STATUS=$(docker inspect --format='{{.State.Health.Status}}' dictia 2>/dev/null || echo "not_found")
|
|
if [ "$DICTIA_STATUS" = "healthy" ]; then
|
|
check "container_dictia" "ok" "healthy"
|
|
elif [ "$DICTIA_STATUS" = "not_found" ]; then
|
|
check "container_dictia" "error" "container not found"
|
|
else
|
|
check "container_dictia" "warning" "$DICTIA_STATUS"
|
|
fi
|
|
|
|
WHISPERX_STATUS=$(docker inspect --format='{{.State.Status}}' whisperx-asr 2>/dev/null || echo "not_found")
|
|
if [ "$WHISPERX_STATUS" = "running" ]; then
|
|
check "container_whisperx" "ok" "running"
|
|
elif [ "$WHISPERX_STATUS" = "not_found" ]; then
|
|
check "container_whisperx" "info" "not present (cloud profile?)"
|
|
else
|
|
check "container_whisperx" "warning" "$WHISPERX_STATUS"
|
|
fi
|
|
|
|
# --- Endpoints ---
|
|
if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
|
|
check "endpoint_dictia" "ok" "http://localhost:8899 responding"
|
|
else
|
|
check "endpoint_dictia" "error" "http://localhost:8899 not responding"
|
|
fi
|
|
|
|
if curl -sf -o /dev/null -m 5 http://localhost:9000/health 2>/dev/null; then
|
|
check "endpoint_whisperx" "ok" "http://localhost:9000 responding"
|
|
else
|
|
check "endpoint_whisperx" "info" "http://localhost:9000 not responding"
|
|
fi
|
|
|
|
if curl -sf -o /dev/null -m 5 http://localhost:9090/health 2>/dev/null; then
|
|
check "endpoint_asr_proxy" "ok" "http://localhost:9090 responding"
|
|
else
|
|
check "endpoint_asr_proxy" "info" "http://localhost:9090 not responding"
|
|
fi
|
|
|
|
# --- Disk ---
|
|
DISK_USED=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $5}' | tr -d '%')
|
|
if [ -n "$DISK_USED" ]; then
|
|
if [ "$DISK_USED" -gt 90 ]; then
|
|
check "disk" "error" "${DISK_USED}% used"
|
|
elif [ "$DISK_USED" -gt 80 ]; then
|
|
check "disk" "warning" "${DISK_USED}% used"
|
|
else
|
|
check "disk" "ok" "${DISK_USED}% used"
|
|
fi
|
|
fi
|
|
|
|
# --- RAM ---
|
|
if command -v free &>/dev/null; then
|
|
MEM_TOTAL=$(free -m | awk '/Mem:/{print $2}')
|
|
MEM_AVAIL=$(free -m | awk '/Mem:/{print $7}')
|
|
MEM_USED_PCT=$(( (MEM_TOTAL - MEM_AVAIL) * 100 / MEM_TOTAL ))
|
|
if [ "$MEM_USED_PCT" -gt 90 ]; then
|
|
check "memory" "warning" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
|
|
else
|
|
check "memory" "ok" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
|
|
fi
|
|
fi
|
|
|
|
# --- GPU ---
|
|
if command -v nvidia-smi &>/dev/null; then
|
|
GPU_INFO=$(nvidia-smi --query-gpu=name,memory.used,memory.total --format=csv,noheader 2>/dev/null || echo "error")
|
|
if [ "$GPU_INFO" != "error" ]; then
|
|
check "gpu" "ok" "$GPU_INFO"
|
|
else
|
|
check "gpu" "warning" "nvidia-smi present but query failed"
|
|
fi
|
|
fi
|
|
|
|
# --- Output ---
|
|
if [ "$OUTPUT" = "json" ]; then
|
|
echo "{"
|
|
echo " \"timestamp\": \"$(date -Is)\","
|
|
echo " \"issues\": $ISSUES,"
|
|
echo " \"checks\": {"
|
|
FIRST=true
|
|
for name in "${!CHECKS[@]}"; do
|
|
IFS='|' read -r status detail <<< "${CHECKS[$name]}"
|
|
if [ "$FIRST" = true ]; then
|
|
FIRST=false
|
|
else
|
|
echo ","
|
|
fi
|
|
printf ' "%s": {"status": "%s", "detail": "%s"}' "$name" "$status" "$detail"
|
|
done
|
|
echo
|
|
echo " }"
|
|
echo "}"
|
|
elif [ "$OUTPUT" = "quiet" ]; then
|
|
exit $( [ "$ISSUES" -eq 0 ] && echo 0 || echo 1 )
|
|
else
|
|
echo "=== DictIA Health Check ==="
|
|
echo
|
|
for name in docker container_dictia container_whisperx endpoint_dictia endpoint_whisperx endpoint_asr_proxy disk memory gpu; do
|
|
if [ -n "${CHECKS[$name]+x}" ]; then
|
|
IFS='|' read -r status detail <<< "${CHECKS[$name]}"
|
|
case "$status" in
|
|
ok) ICON="[OK]" ;;
|
|
warning) ICON="[!!]" ;;
|
|
error) ICON="[ERR]" ;;
|
|
info) ICON="[--]" ;;
|
|
esac
|
|
printf " %-22s %s %s\n" "$name" "$ICON" "$detail"
|
|
fi
|
|
done
|
|
echo
|
|
if [ "$ISSUES" -eq 0 ]; then
|
|
echo "All checks passed."
|
|
else
|
|
echo "$ISSUES issue(s) found."
|
|
fi
|
|
fi
|