Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)
This commit is contained in:
157
deployment/tools/health-check.sh
Normal file
157
deployment/tools/health-check.sh
Normal file
@@ -0,0 +1,157 @@
|
||||
#!/usr/bin/env bash
|
||||
# DictIA — Health check diagnostic
|
||||
#
|
||||
# Checks Docker, containers, endpoints, disk, RAM, and GPU.
|
||||
#
|
||||
# Usage:
|
||||
# bash health-check.sh # Human-readable output
|
||||
# bash health-check.sh --json # JSON output
|
||||
# bash health-check.sh --quiet # Exit code only (0=ok, 1=issue)
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
OUTPUT="human"
|
||||
ISSUES=0
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--json) OUTPUT="json" ;;
|
||||
--quiet) OUTPUT="quiet" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
declare -A CHECKS
|
||||
|
||||
check() {
|
||||
local name="$1"
|
||||
local status="$2"
|
||||
local detail="${3:-}"
|
||||
CHECKS["$name"]="$status|$detail"
|
||||
if [ "$status" = "error" ] || [ "$status" = "warning" ]; then
|
||||
ISSUES=$((ISSUES + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Docker ---
|
||||
if command -v docker &>/dev/null && docker info &>/dev/null; then
|
||||
check "docker" "ok" "Docker daemon running"
|
||||
else
|
||||
check "docker" "error" "Docker not available"
|
||||
fi
|
||||
|
||||
# --- Containers ---
|
||||
DICTIA_STATUS=$(docker inspect --format='{{.State.Health.Status}}' dictia 2>/dev/null || echo "not_found")
|
||||
if [ "$DICTIA_STATUS" = "healthy" ]; then
|
||||
check "container_dictia" "ok" "healthy"
|
||||
elif [ "$DICTIA_STATUS" = "not_found" ]; then
|
||||
check "container_dictia" "error" "container not found"
|
||||
else
|
||||
check "container_dictia" "warning" "$DICTIA_STATUS"
|
||||
fi
|
||||
|
||||
WHISPERX_STATUS=$(docker inspect --format='{{.State.Status}}' whisperx-asr 2>/dev/null || echo "not_found")
|
||||
if [ "$WHISPERX_STATUS" = "running" ]; then
|
||||
check "container_whisperx" "ok" "running"
|
||||
elif [ "$WHISPERX_STATUS" = "not_found" ]; then
|
||||
check "container_whisperx" "info" "not present (cloud profile?)"
|
||||
else
|
||||
check "container_whisperx" "warning" "$WHISPERX_STATUS"
|
||||
fi
|
||||
|
||||
# --- Endpoints ---
|
||||
if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
|
||||
check "endpoint_dictia" "ok" "http://localhost:8899 responding"
|
||||
else
|
||||
check "endpoint_dictia" "error" "http://localhost:8899 not responding"
|
||||
fi
|
||||
|
||||
if curl -sf -o /dev/null -m 5 http://localhost:9000/health 2>/dev/null; then
|
||||
check "endpoint_whisperx" "ok" "http://localhost:9000 responding"
|
||||
else
|
||||
check "endpoint_whisperx" "info" "http://localhost:9000 not responding"
|
||||
fi
|
||||
|
||||
if curl -sf -o /dev/null -m 5 http://localhost:9090/health 2>/dev/null; then
|
||||
check "endpoint_asr_proxy" "ok" "http://localhost:9090 responding"
|
||||
else
|
||||
check "endpoint_asr_proxy" "info" "http://localhost:9090 not responding"
|
||||
fi
|
||||
|
||||
# --- Disk ---
|
||||
DISK_USED=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $5}' | tr -d '%')
|
||||
if [ -n "$DISK_USED" ]; then
|
||||
if [ "$DISK_USED" -gt 90 ]; then
|
||||
check "disk" "error" "${DISK_USED}% used"
|
||||
elif [ "$DISK_USED" -gt 80 ]; then
|
||||
check "disk" "warning" "${DISK_USED}% used"
|
||||
else
|
||||
check "disk" "ok" "${DISK_USED}% used"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- RAM ---
|
||||
if command -v free &>/dev/null; then
|
||||
MEM_TOTAL=$(free -m | awk '/Mem:/{print $2}')
|
||||
MEM_AVAIL=$(free -m | awk '/Mem:/{print $7}')
|
||||
MEM_USED_PCT=$(( (MEM_TOTAL - MEM_AVAIL) * 100 / MEM_TOTAL ))
|
||||
if [ "$MEM_USED_PCT" -gt 90 ]; then
|
||||
check "memory" "warning" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
|
||||
else
|
||||
check "memory" "ok" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- GPU ---
|
||||
if command -v nvidia-smi &>/dev/null; then
|
||||
GPU_INFO=$(nvidia-smi --query-gpu=name,memory.used,memory.total --format=csv,noheader 2>/dev/null || echo "error")
|
||||
if [ "$GPU_INFO" != "error" ]; then
|
||||
check "gpu" "ok" "$GPU_INFO"
|
||||
else
|
||||
check "gpu" "warning" "nvidia-smi present but query failed"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Output ---
|
||||
if [ "$OUTPUT" = "json" ]; then
|
||||
echo "{"
|
||||
echo " \"timestamp\": \"$(date -Is)\","
|
||||
echo " \"issues\": $ISSUES,"
|
||||
echo " \"checks\": {"
|
||||
FIRST=true
|
||||
for name in "${!CHECKS[@]}"; do
|
||||
IFS='|' read -r status detail <<< "${CHECKS[$name]}"
|
||||
if [ "$FIRST" = true ]; then
|
||||
FIRST=false
|
||||
else
|
||||
echo ","
|
||||
fi
|
||||
printf ' "%s": {"status": "%s", "detail": "%s"}' "$name" "$status" "$detail"
|
||||
done
|
||||
echo
|
||||
echo " }"
|
||||
echo "}"
|
||||
elif [ "$OUTPUT" = "quiet" ]; then
|
||||
exit $( [ "$ISSUES" -eq 0 ] && echo 0 || echo 1 )
|
||||
else
|
||||
echo "=== DictIA Health Check ==="
|
||||
echo
|
||||
for name in docker container_dictia container_whisperx endpoint_dictia endpoint_whisperx endpoint_asr_proxy disk memory gpu; do
|
||||
if [ -n "${CHECKS[$name]+x}" ]; then
|
||||
IFS='|' read -r status detail <<< "${CHECKS[$name]}"
|
||||
case "$status" in
|
||||
ok) ICON="[OK]" ;;
|
||||
warning) ICON="[!!]" ;;
|
||||
error) ICON="[ERR]" ;;
|
||||
info) ICON="[--]" ;;
|
||||
esac
|
||||
printf " %-22s %s %s\n" "$name" "$ICON" "$detail"
|
||||
fi
|
||||
done
|
||||
echo
|
||||
if [ "$ISSUES" -eq 0 ]; then
|
||||
echo "All checks passed."
|
||||
else
|
||||
echo "$ISSUES issue(s) found."
|
||||
fi
|
||||
fi
|
||||
Reference in New Issue
Block a user