Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)

This commit is contained in:
InnovA AI
2026-03-16 21:47:37 +00:00
commit 42772a31ed
365 changed files with 103572 additions and 0 deletions

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env bash
# DictIA — Backup script
#
# Creates a timestamped backup of data, env, and Docker volumes.
# Keeps the last N backups (default: 5).
#
# Usage: bash backup.sh [BACKUP_DIR]
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
BACKUP_BASE="${1:-$PROJECT_DIR/backups}"
KEEP_COUNT=5
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
BACKUP_DIR="$BACKUP_BASE/dictia-$TIMESTAMP"
echo "=== DictIA Backup ==="
echo "Project: $PROJECT_DIR"
echo "Backup: $BACKUP_DIR"
echo
mkdir -p "$BACKUP_DIR"
# 1. Data directory
if [ -d "$PROJECT_DIR/data" ]; then
echo "[1/4] Backing up data/..."
cp -a "$PROJECT_DIR/data" "$BACKUP_DIR/data"
else
echo "[1/4] No data/ directory found, skipping."
fi
# 2. Environment file
if [ -f "$PROJECT_DIR/.env" ]; then
echo "[2/4] Backing up .env..."
cp "$PROJECT_DIR/.env" "$BACKUP_DIR/dot-env"
else
echo "[2/4] No .env found, skipping."
fi
# 3. ASR Proxy stats
ASR_STATS="$PROJECT_DIR/deployment/asr-proxy/usage-stats.json"
if [ -f "$ASR_STATS" ]; then
echo "[3/4] Backing up ASR proxy stats..."
cp "$ASR_STATS" "$BACKUP_DIR/asr-usage-stats.json"
else
echo "[3/4] No ASR proxy stats, skipping."
fi
# 4. Docker volumes (if using managed volumes)
echo "[4/4] Checking Docker volumes..."
if docker volume ls --format '{{.Name}}' 2>/dev/null | grep -q "whisperx-cache"; then
echo " Exporting whisperx-cache volume..."
docker run --rm -v whisperx-cache:/source -v "$BACKUP_DIR":/backup \
alpine tar czf /backup/whisperx-cache.tar.gz -C /source . 2>/dev/null || true
fi
# Write manifest
cat > "$BACKUP_DIR/manifest.json" <<MANIFEST
{
"timestamp": "$TIMESTAMP",
"project_dir": "$PROJECT_DIR",
"hostname": "$(hostname)",
"contents": {
"data": $([ -d "$BACKUP_DIR/data" ] && echo "true" || echo "false"),
"env": $([ -f "$BACKUP_DIR/dot-env" ] && echo "true" || echo "false"),
"asr_stats": $([ -f "$BACKUP_DIR/asr-usage-stats.json" ] && echo "true" || echo "false"),
"whisperx_cache": $([ -f "$BACKUP_DIR/whisperx-cache.tar.gz" ] && echo "true" || echo "false")
}
}
MANIFEST
# Compress
echo
echo "Compressing backup..."
ARCHIVE="$BACKUP_BASE/dictia-$TIMESTAMP.tar.gz"
tar czf "$ARCHIVE" -C "$BACKUP_BASE" "dictia-$TIMESTAMP"
rm -rf "$BACKUP_DIR"
echo "Archive: $ARCHIVE ($(du -h "$ARCHIVE" | cut -f1))"
# Rotate old backups
BACKUP_COUNT=$(ls -1 "$BACKUP_BASE"/dictia-*.tar.gz 2>/dev/null | wc -l)
if [ "$BACKUP_COUNT" -gt "$KEEP_COUNT" ]; then
echo
echo "Rotating backups (keeping last $KEEP_COUNT)..."
ls -1t "$BACKUP_BASE"/dictia-*.tar.gz | tail -n +"$((KEEP_COUNT + 1))" | xargs rm -f
fi
echo
echo "=== Backup complete ==="

View File

@@ -0,0 +1,157 @@
#!/usr/bin/env bash
# DictIA — Health check diagnostic
#
# Checks Docker, containers, endpoints, disk, RAM, and GPU.
#
# Usage:
# bash health-check.sh # Human-readable output
# bash health-check.sh --json # JSON output
# bash health-check.sh --quiet # Exit code only (0=ok, 1=issue)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
OUTPUT="human"
ISSUES=0
for arg in "$@"; do
case "$arg" in
--json) OUTPUT="json" ;;
--quiet) OUTPUT="quiet" ;;
esac
done
declare -A CHECKS
check() {
local name="$1"
local status="$2"
local detail="${3:-}"
CHECKS["$name"]="$status|$detail"
if [ "$status" = "error" ] || [ "$status" = "warning" ]; then
ISSUES=$((ISSUES + 1))
fi
}
# --- Docker ---
if command -v docker &>/dev/null && docker info &>/dev/null; then
check "docker" "ok" "Docker daemon running"
else
check "docker" "error" "Docker not available"
fi
# --- Containers ---
DICTIA_STATUS=$(docker inspect --format='{{.State.Health.Status}}' dictia 2>/dev/null || echo "not_found")
if [ "$DICTIA_STATUS" = "healthy" ]; then
check "container_dictia" "ok" "healthy"
elif [ "$DICTIA_STATUS" = "not_found" ]; then
check "container_dictia" "error" "container not found"
else
check "container_dictia" "warning" "$DICTIA_STATUS"
fi
WHISPERX_STATUS=$(docker inspect --format='{{.State.Status}}' whisperx-asr 2>/dev/null || echo "not_found")
if [ "$WHISPERX_STATUS" = "running" ]; then
check "container_whisperx" "ok" "running"
elif [ "$WHISPERX_STATUS" = "not_found" ]; then
check "container_whisperx" "info" "not present (cloud profile?)"
else
check "container_whisperx" "warning" "$WHISPERX_STATUS"
fi
# --- Endpoints ---
if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
check "endpoint_dictia" "ok" "http://localhost:8899 responding"
else
check "endpoint_dictia" "error" "http://localhost:8899 not responding"
fi
if curl -sf -o /dev/null -m 5 http://localhost:9000/health 2>/dev/null; then
check "endpoint_whisperx" "ok" "http://localhost:9000 responding"
else
check "endpoint_whisperx" "info" "http://localhost:9000 not responding"
fi
if curl -sf -o /dev/null -m 5 http://localhost:9090/health 2>/dev/null; then
check "endpoint_asr_proxy" "ok" "http://localhost:9090 responding"
else
check "endpoint_asr_proxy" "info" "http://localhost:9090 not responding"
fi
# --- Disk ---
DISK_USED=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $5}' | tr -d '%')
if [ -n "$DISK_USED" ]; then
if [ "$DISK_USED" -gt 90 ]; then
check "disk" "error" "${DISK_USED}% used"
elif [ "$DISK_USED" -gt 80 ]; then
check "disk" "warning" "${DISK_USED}% used"
else
check "disk" "ok" "${DISK_USED}% used"
fi
fi
# --- RAM ---
if command -v free &>/dev/null; then
MEM_TOTAL=$(free -m | awk '/Mem:/{print $2}')
MEM_AVAIL=$(free -m | awk '/Mem:/{print $7}')
MEM_USED_PCT=$(( (MEM_TOTAL - MEM_AVAIL) * 100 / MEM_TOTAL ))
if [ "$MEM_USED_PCT" -gt 90 ]; then
check "memory" "warning" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
else
check "memory" "ok" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
fi
fi
# --- GPU ---
if command -v nvidia-smi &>/dev/null; then
GPU_INFO=$(nvidia-smi --query-gpu=name,memory.used,memory.total --format=csv,noheader 2>/dev/null || echo "error")
if [ "$GPU_INFO" != "error" ]; then
check "gpu" "ok" "$GPU_INFO"
else
check "gpu" "warning" "nvidia-smi present but query failed"
fi
fi
# --- Output ---
if [ "$OUTPUT" = "json" ]; then
echo "{"
echo " \"timestamp\": \"$(date -Is)\","
echo " \"issues\": $ISSUES,"
echo " \"checks\": {"
FIRST=true
for name in "${!CHECKS[@]}"; do
IFS='|' read -r status detail <<< "${CHECKS[$name]}"
if [ "$FIRST" = true ]; then
FIRST=false
else
echo ","
fi
printf ' "%s": {"status": "%s", "detail": "%s"}' "$name" "$status" "$detail"
done
echo
echo " }"
echo "}"
elif [ "$OUTPUT" = "quiet" ]; then
exit $( [ "$ISSUES" -eq 0 ] && echo 0 || echo 1 )
else
echo "=== DictIA Health Check ==="
echo
for name in docker container_dictia container_whisperx endpoint_dictia endpoint_whisperx endpoint_asr_proxy disk memory gpu; do
if [ -n "${CHECKS[$name]+x}" ]; then
IFS='|' read -r status detail <<< "${CHECKS[$name]}"
case "$status" in
ok) ICON="[OK]" ;;
warning) ICON="[!!]" ;;
error) ICON="[ERR]" ;;
info) ICON="[--]" ;;
esac
printf " %-22s %s %s\n" "$name" "$ICON" "$detail"
fi
done
echo
if [ "$ISSUES" -eq 0 ]; then
echo "All checks passed."
else
echo "$ISSUES issue(s) found."
fi
fi

101
deployment/tools/restore.sh Normal file
View File

@@ -0,0 +1,101 @@
#!/usr/bin/env bash
# DictIA — Restore script
#
# Restores a DictIA backup archive created by backup.sh.
#
# Usage: bash restore.sh <ARCHIVE_PATH> [PROJECT_DIR]
set -euo pipefail
ARCHIVE="${1:-}"
PROJECT_DIR="${2:-$(cd "$(dirname "$0")/../.." && pwd)}"
if [ -z "$ARCHIVE" ] || [ ! -f "$ARCHIVE" ]; then
echo "Usage: bash restore.sh <backup-archive.tar.gz> [project-dir]"
echo
echo "Available backups:"
ls -1t "$PROJECT_DIR/backups"/dictia-*.tar.gz 2>/dev/null | head -5 || echo " (none found)"
exit 1
fi
echo "=== DictIA Restore ==="
echo "Archive: $ARCHIVE"
echo "Target: $PROJECT_DIR"
echo
# Validate archive
echo "Validating archive..."
TMPDIR=$(mktemp -d)
tar xzf "$ARCHIVE" -C "$TMPDIR"
BACKUP_DIR=$(ls -1d "$TMPDIR"/dictia-* | head -1)
if [ ! -f "$BACKUP_DIR/manifest.json" ]; then
echo "ERROR: Invalid backup archive (no manifest.json)"
rm -rf "$TMPDIR"
exit 1
fi
echo "Manifest:"
cat "$BACKUP_DIR/manifest.json"
echo
echo
# Confirmation
read -rp "This will overwrite current data. Continue? [y/N] " CONFIRM
if [ "$CONFIRM" != "y" ] && [ "$CONFIRM" != "Y" ]; then
echo "Aborted."
rm -rf "$TMPDIR"
exit 0
fi
# Stop services
echo
echo "Stopping DictIA services..."
COMPOSE_FILE=""
for f in cloud local-cpu local-gpu; do
if [ -f "$PROJECT_DIR/deployment/docker/docker-compose.$f.yml" ]; then
COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml"
fi
done
if [ -n "$COMPOSE_FILE" ]; then
docker compose -f "$COMPOSE_FILE" down 2>/dev/null || true
fi
# Restore data
if [ -d "$BACKUP_DIR/data" ]; then
echo "Restoring data/..."
rm -rf "$PROJECT_DIR/data"
cp -a "$BACKUP_DIR/data" "$PROJECT_DIR/data"
fi
# Restore .env
if [ -f "$BACKUP_DIR/dot-env" ]; then
echo "Restoring .env..."
cp "$BACKUP_DIR/dot-env" "$PROJECT_DIR/.env"
fi
# Restore ASR stats
if [ -f "$BACKUP_DIR/asr-usage-stats.json" ]; then
echo "Restoring ASR proxy stats..."
cp "$BACKUP_DIR/asr-usage-stats.json" "$PROJECT_DIR/deployment/asr-proxy/usage-stats.json"
fi
# Restore Docker volumes
if [ -f "$BACKUP_DIR/whisperx-cache.tar.gz" ]; then
echo "Restoring whisperx-cache volume..."
docker volume create whisperx-cache 2>/dev/null || true
docker run --rm -v whisperx-cache:/target -v "$BACKUP_DIR":/backup \
alpine sh -c "cd /target && tar xzf /backup/whisperx-cache.tar.gz" 2>/dev/null || true
fi
# Cleanup
rm -rf "$TMPDIR"
# Restart services
echo
echo "Restarting DictIA..."
if [ -n "$COMPOSE_FILE" ]; then
docker compose -f "$COMPOSE_FILE" up -d
fi
echo
echo "=== Restore complete ==="

105
deployment/tools/update.sh Normal file
View File

@@ -0,0 +1,105 @@
#!/usr/bin/env bash
# DictIA — Update script
#
# Pulls latest code, rebuilds Docker image, and restarts services.
# Detects the active deployment profile automatically.
#
# Usage: bash update.sh [--no-pull] [--no-build]
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
NO_PULL=false
NO_BUILD=false
for arg in "$@"; do
case "$arg" in
--no-pull) NO_PULL=true ;;
--no-build) NO_BUILD=true ;;
*) echo "Unknown option: $arg"; exit 1 ;;
esac
done
echo "=== DictIA Update ==="
echo "Project: $PROJECT_DIR"
echo
# 1. Detect active compose file
COMPOSE_FILE=""
PROFILE=""
for f in cloud local-cpu local-gpu; do
CF="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml"
if [ -f "$CF" ] && docker compose -f "$CF" ps --quiet 2>/dev/null | grep -q .; then
COMPOSE_FILE="$CF"
PROFILE="$f"
break
fi
done
if [ -z "$COMPOSE_FILE" ]; then
# Fallback: check .env for profile
if [ -f "$PROJECT_DIR/.env" ]; then
PROFILE=$(grep -E '^DICTIA_PROFILE=' "$PROJECT_DIR/.env" 2>/dev/null | cut -d= -f2 || echo "cloud")
fi
PROFILE="${PROFILE:-cloud}"
COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$PROFILE.yml"
fi
echo "Profile: $PROFILE"
echo "Compose: $COMPOSE_FILE"
echo
# 2. Git pull
if [ "$NO_PULL" = false ]; then
echo "[1/5] Pulling latest code..."
cd "$PROJECT_DIR"
git pull origin dictia-branding
else
echo "[1/5] Skipping git pull (--no-pull)"
fi
# 3. Rebuild DictIA image
if [ "$NO_BUILD" = false ]; then
echo "[2/5] Building DictIA image..."
cd "$PROJECT_DIR"
docker build -t innova-ai/dictia:latest .
else
echo "[2/5] Skipping build (--no-build)"
fi
# 3b. Pull upstream images (WhisperX) if local profile
if [ "$PROFILE" != "cloud" ] && [ "$NO_BUILD" = false ]; then
echo "[3/5] Pulling upstream images (WhisperX)..."
docker compose -f "$COMPOSE_FILE" pull whisperx-asr 2>/dev/null || true
else
echo "[3/5] Skipping upstream pull (cloud profile or --no-build)"
fi
# 4. Restart containers
echo "[4/5] Restarting containers..."
docker compose -f "$COMPOSE_FILE" down
docker compose -f "$COMPOSE_FILE" up -d
# 5. Wait for health
echo "[5/5] Waiting for health check..."
RETRIES=30
for i in $(seq 1 $RETRIES); do
if docker compose -f "$COMPOSE_FILE" ps | grep -q "healthy"; then
echo " DictIA is healthy!"
break
fi
if [ "$i" -eq "$RETRIES" ]; then
echo " WARNING: Health check timeout. Check: docker compose -f $COMPOSE_FILE logs"
fi
sleep 5
done
# Cleanup dangling images
echo
echo "Cleaning up old images..."
docker image prune -f 2>/dev/null || true
echo
echo "=== Update complete ==="
echo "DictIA: http://localhost:8899"
docker compose -f "$COMPOSE_FILE" ps