#!/usr/bin/env bash # DictIA — Health check diagnostic # # Checks Docker, containers, endpoints, disk, RAM, and GPU. # # Usage: # bash health-check.sh # Human-readable output # bash health-check.sh --json # JSON output # bash health-check.sh --quiet # Exit code only (0=ok, 1=issue) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" OUTPUT="human" ISSUES=0 for arg in "$@"; do case "$arg" in --json) OUTPUT="json" ;; --quiet) OUTPUT="quiet" ;; esac done declare -A CHECKS check() { local name="$1" local status="$2" local detail="${3:-}" CHECKS["$name"]="$status|$detail" if [ "$status" = "error" ] || [ "$status" = "warning" ]; then ISSUES=$((ISSUES + 1)) fi } # --- Docker --- if command -v docker &>/dev/null && docker info &>/dev/null; then check "docker" "ok" "Docker daemon running" else check "docker" "error" "Docker not available" fi # --- Containers --- DICTIA_STATUS=$(docker inspect --format='{{.State.Health.Status}}' dictia 2>/dev/null || echo "not_found") if [ "$DICTIA_STATUS" = "healthy" ]; then check "container_dictia" "ok" "healthy" elif [ "$DICTIA_STATUS" = "not_found" ]; then check "container_dictia" "error" "container not found" else check "container_dictia" "warning" "$DICTIA_STATUS" fi WHISPERX_STATUS=$(docker inspect --format='{{.State.Status}}' whisperx-asr 2>/dev/null || echo "not_found") if [ "$WHISPERX_STATUS" = "running" ]; then check "container_whisperx" "ok" "running" elif [ "$WHISPERX_STATUS" = "not_found" ]; then check "container_whisperx" "info" "not present (cloud profile?)" else check "container_whisperx" "warning" "$WHISPERX_STATUS" fi # --- Endpoints --- if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then check "endpoint_dictia" "ok" "http://localhost:8899 responding" else check "endpoint_dictia" "error" "http://localhost:8899 not responding" fi if curl -sf -o /dev/null -m 5 http://localhost:9000/health 2>/dev/null; then check "endpoint_whisperx" "ok" "http://localhost:9000 responding" else check "endpoint_whisperx" "info" "http://localhost:9000 not responding" fi if curl -sf -o /dev/null -m 5 http://localhost:9090/health 2>/dev/null; then check "endpoint_asr_proxy" "ok" "http://localhost:9090 responding" else check "endpoint_asr_proxy" "info" "http://localhost:9090 not responding" fi # --- Disk --- DISK_USED=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $5}' | tr -d '%') if [ -n "$DISK_USED" ]; then if [ "$DISK_USED" -gt 90 ]; then check "disk" "error" "${DISK_USED}% used" elif [ "$DISK_USED" -gt 80 ]; then check "disk" "warning" "${DISK_USED}% used" else check "disk" "ok" "${DISK_USED}% used" fi fi # --- RAM --- if command -v free &>/dev/null; then MEM_TOTAL=$(free -m | awk '/Mem:/{print $2}') MEM_AVAIL=$(free -m | awk '/Mem:/{print $7}') MEM_USED_PCT=$(( (MEM_TOTAL - MEM_AVAIL) * 100 / MEM_TOTAL )) if [ "$MEM_USED_PCT" -gt 90 ]; then check "memory" "warning" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)" else check "memory" "ok" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)" fi fi # --- GPU --- if command -v nvidia-smi &>/dev/null; then GPU_INFO=$(nvidia-smi --query-gpu=name,memory.used,memory.total --format=csv,noheader 2>/dev/null || echo "error") if [ "$GPU_INFO" != "error" ]; then check "gpu" "ok" "$GPU_INFO" else check "gpu" "warning" "nvidia-smi present but query failed" fi fi # --- Output --- if [ "$OUTPUT" = "json" ]; then echo "{" echo " \"timestamp\": \"$(date -Is)\"," echo " \"issues\": $ISSUES," echo " \"checks\": {" FIRST=true for name in "${!CHECKS[@]}"; do IFS='|' read -r status detail <<< "${CHECKS[$name]}" if [ "$FIRST" = true ]; then FIRST=false else echo "," fi printf ' "%s": {"status": "%s", "detail": "%s"}' "$name" "$status" "$detail" done echo echo " }" echo "}" elif [ "$OUTPUT" = "quiet" ]; then exit $( [ "$ISSUES" -eq 0 ] && echo 0 || echo 1 ) else echo "=== DictIA Health Check ===" echo for name in docker container_dictia container_whisperx endpoint_dictia endpoint_whisperx endpoint_asr_proxy disk memory gpu; do if [ -n "${CHECKS[$name]+x}" ]; then IFS='|' read -r status detail <<< "${CHECKS[$name]}" case "$status" in ok) ICON="[OK]" ;; warning) ICON="[!!]" ;; error) ICON="[ERR]" ;; info) ICON="[--]" ;; esac printf " %-22s %s %s\n" "$name" "$ICON" "$detail" fi done echo if [ "$ISSUES" -eq 0 ]; then echo "All checks passed." else echo "$ISSUES issue(s) found." fi fi