From 39ac76ab03db1e71c4f01a6679a1502672791158 Mon Sep 17 00:00:00 2001 From: InnovA AI Date: Mon, 16 Mar 2026 21:57:40 +0000 Subject: [PATCH] chore: retirer deployment/ (infra interne, pas requis pour AGPL) --- README.md | 8 - deployment/README.md | 105 -- deployment/asr-proxy/.gitignore | 5 - deployment/asr-proxy/asr-proxy.service | 22 - deployment/asr-proxy/dashboard.html | 1534 ----------------- deployment/asr-proxy/proxy.py | 741 -------- deployment/asr-proxy/requirements.txt | 5 - deployment/asr-proxy/setup.sh | 87 - deployment/config/nginx/dictia.conf | 83 - deployment/config/systemd/dictia.service | 15 - deployment/config/tailscale/setup-serve.sh | 52 - deployment/docker/.env.example | 124 -- deployment/docker/docker-compose.cloud.yml | 40 - .../docker/docker-compose.local-cpu.yml | 64 - .../docker/docker-compose.local-gpu.yml | 69 - deployment/docs/LOCAL-SETUP.md | 118 -- deployment/docs/MAINTENANCE.md | 136 -- deployment/docs/QUICKSTART.md | 90 - deployment/docs/TROUBLESHOOTING.md | 177 -- deployment/docs/VPS-SETUP.md | 148 -- .../profiles/docker-compose.dictia16.yml | 101 -- .../profiles/docker-compose.dictia8.yml | 75 - deployment/profiles/env.dictia16.example | 134 -- deployment/profiles/env.dictia8.example | 126 -- deployment/security/docker-daemon.json | 8 - deployment/security/docker-iptables.service | 12 - deployment/security/iptables-rules.sh | 36 - deployment/setup.sh | 300 ---- deployment/tools/backup.sh | 89 - deployment/tools/health-check.sh | 157 -- deployment/tools/restore.sh | 101 -- deployment/tools/update.sh | 105 -- 32 files changed, 4867 deletions(-) delete mode 100644 deployment/README.md delete mode 100644 deployment/asr-proxy/.gitignore delete mode 100644 deployment/asr-proxy/asr-proxy.service delete mode 100644 deployment/asr-proxy/dashboard.html delete mode 100644 deployment/asr-proxy/proxy.py delete mode 100644 deployment/asr-proxy/requirements.txt delete mode 100644 deployment/asr-proxy/setup.sh delete mode 100644 deployment/config/nginx/dictia.conf delete mode 100644 deployment/config/systemd/dictia.service delete mode 100644 deployment/config/tailscale/setup-serve.sh delete mode 100644 deployment/docker/.env.example delete mode 100644 deployment/docker/docker-compose.cloud.yml delete mode 100644 deployment/docker/docker-compose.local-cpu.yml delete mode 100644 deployment/docker/docker-compose.local-gpu.yml delete mode 100644 deployment/docs/LOCAL-SETUP.md delete mode 100644 deployment/docs/MAINTENANCE.md delete mode 100644 deployment/docs/QUICKSTART.md delete mode 100644 deployment/docs/TROUBLESHOOTING.md delete mode 100644 deployment/docs/VPS-SETUP.md delete mode 100644 deployment/profiles/docker-compose.dictia16.yml delete mode 100644 deployment/profiles/docker-compose.dictia8.yml delete mode 100644 deployment/profiles/env.dictia16.example delete mode 100644 deployment/profiles/env.dictia8.example delete mode 100644 deployment/security/docker-daemon.json delete mode 100644 deployment/security/docker-iptables.service delete mode 100644 deployment/security/iptables-rules.sh delete mode 100755 deployment/setup.sh delete mode 100644 deployment/tools/backup.sh delete mode 100644 deployment/tools/health-check.sh delete mode 100644 deployment/tools/restore.sh delete mode 100644 deployment/tools/update.sh diff --git a/README.md b/README.md index e5ae33a..e189276 100644 --- a/README.md +++ b/README.md @@ -13,20 +13,12 @@ Application de transcription audio propulsee par l'intelligence artificielle. Tr - Conformite Loi 25 (Quebec) — journal d'audit integre - 100% auto-heberge — vos donnees restent chez vous -## Demarrage rapide - -Voir le [guide de demarrage](client_docs/guide-utilisateur/premiers-pas.md). - ## Documentation - [Guide utilisateur](client_docs/guide-utilisateur/index.md) - [Guide administrateur](client_docs/guide-admin/index.md) - [Depannage](client_docs/depannage/index.md) -## Deploiement - -Voir le [guide de deploiement](deployment/README.md) et les profils Docker dans `deployment/profiles/`. - ## Licence AGPL-3.0 — voir [LICENSE](LICENSE). diff --git a/deployment/README.md b/deployment/README.md deleted file mode 100644 index 0569f71..0000000 --- a/deployment/README.md +++ /dev/null @@ -1,105 +0,0 @@ -# DictIA — Deployment Infrastructure - -Infrastructure de deploiement reproductible pour DictIA . - -## Choix de profil - -``` -Quel est ton setup? - | - +-- VPS / serveur cloud? - | --> cloud (ASR Proxy GCP GPU on demand) - | - +-- Machine locale avec GPU NVIDIA? - | --> local-gpu (WhisperX sur GPU, le plus rapide) - | - +-- Machine locale sans GPU? - --> local-cpu (WhisperX sur CPU, lent mais fonctionnel) -``` - -## Quickstart - -```bash -git clone https://gitea.innova-ai.ca/Innova-AI/dictia-public.git -cd dictia -git checkout dictia-branding -bash deployment/setup.sh -``` - -Le script detecte le hardware et guide l'installation. - -## Architecture - -``` -deployment/ -├── setup.sh # Installateur principal -├── docker/ -│ ├── docker-compose.cloud.yml -│ ├── docker-compose.local-cpu.yml -│ ├── docker-compose.local-gpu.yml -│ └── .env.example -├── asr-proxy/ # Proxy GCP GPU (cloud seulement) -│ ├── proxy.py -│ ├── dashboard.html -│ ├── requirements.txt -│ ├── setup.sh -│ └── asr-proxy.service -├── security/ # Securite Docker (cloud) -│ ├── docker-daemon.json -│ ├── iptables-rules.sh -│ └── docker-iptables.service -├── config/ -│ ├── nginx/dictia.conf -│ ├── tailscale/setup-serve.sh -│ └── systemd/dictia.service -├── tools/ -│ ├── backup.sh -│ ├── restore.sh -│ ├── update.sh -│ └── health-check.sh -└── docs/ - ├── QUICKSTART.md - ├── VPS-SETUP.md - ├── LOCAL-SETUP.md - ├── MAINTENANCE.md - └── TROUBLESHOOTING.md -``` - -### Profil Cloud - -``` -Internet --> Tailscale --> VPS - | - DictIA :8899 - | - ASR Proxy :9090 - | - GCP GPU (auto start/stop) - | - WhisperX :9000 -``` - -### Profil Local GPU/CPU - -``` -localhost:8899 --> DictIA container - | - WhisperX container :9000 - | - GPU local (ou CPU) -``` - -## Documentation - -- [QUICKSTART.md](docs/QUICKSTART.md) — Demarrage rapide par profil -- [VPS-SETUP.md](docs/VPS-SETUP.md) — Setup VPS complet from scratch -- [LOCAL-SETUP.md](docs/LOCAL-SETUP.md) — Setup local GPU/CPU -- [MAINTENANCE.md](docs/MAINTENANCE.md) — Backup, restore, update, monitoring -- [TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) — Problemes courants + solutions - -## Mise a jour upstream - -Tous les fichiers dans `deployment/` sont specifiques a DictIA. -Aucun conflit lors des merges upstream, sauf `deployment/setup.sh` -(qui remplace le setup.sh original de Speakr). - diff --git a/deployment/asr-proxy/.gitignore b/deployment/asr-proxy/.gitignore deleted file mode 100644 index 8ff2efb..0000000 --- a/deployment/asr-proxy/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -gcp-credentials.json -usage-stats.json -venv/ -__pycache__/ -*.pyc diff --git a/deployment/asr-proxy/asr-proxy.service b/deployment/asr-proxy/asr-proxy.service deleted file mode 100644 index 917ec8a..0000000 --- a/deployment/asr-proxy/asr-proxy.service +++ /dev/null @@ -1,22 +0,0 @@ -# TEMPLATE — Ne pas copier directement dans /etc/systemd/system/. -# Les variables ${ASR_PROXY_USER} et ${ASR_PROXY_DIR} sont des placeholders. -# Le fichier service réel est généré par setup.sh (via heredoc bash) avec les -# valeurs résolues de $SERVICE_USER et $INSTALL_DIR. -# Usage : sudo bash setup.sh (installe et active le service automatiquement) - -[Unit] -Description=DictIA ASR Proxy - GPU Auto-Start/Stop for WhisperX -After=network.target - -[Service] -Type=simple -User=${ASR_PROXY_USER} -Restart=always -RestartSec=10 -WorkingDirectory=${ASR_PROXY_DIR} -ExecStart=${ASR_PROXY_DIR}/venv/bin/python proxy.py -Environment=GOOGLE_APPLICATION_CREDENTIALS=${ASR_PROXY_DIR}/gcp-credentials.json -Environment=STATS_FILE=${ASR_PROXY_DIR}/usage-stats.json - -[Install] -WantedBy=multi-user.target diff --git a/deployment/asr-proxy/dashboard.html b/deployment/asr-proxy/dashboard.html deleted file mode 100644 index ba1ca7b..0000000 --- a/deployment/asr-proxy/dashboard.html +++ /dev/null @@ -1,1534 +0,0 @@ - - - - - -DictIA GPU Monitor - - - - - - - -
- Connection error: unable to reach proxy -
- -
- - -
-

DICTIA GPU MONITOR

-
-
- proxy: connecting... -
-
- - -
-
-
-
-
-
-
---
-
---
-
- 0 active requests -
-
- - -
-
-
--
-
GPU Time
-
This Month
-
-
-
--
-
Estimated Cost
-
USD
-
-
-
--
-
Total Requests
-
This Month
-
-
-
--
-
Remaining
-
of --h
-
-
- - -
-
-
- Monthly Budget - --% -
-
-
-
-
--h / --h
-
-
- - -
- - -
- - -
-
-
Instance Details
-
-
-
IP
-
---
-
-
-
Machine
-
---
-
-
-
GPU
-
---
-
-
-
Idle
-
---
-
-
-
OAuth Token
-
---
-
-
-
Cost Rate
-
---
-
-
-
-
- - -
-
-
Zone Fallback Map
-
- -
-
-
- - -
-
-
Request History
-
- - - - - - - - - - - - - -
TimeTypeDurationStatusZone
No requests yet
-
-
-
- - -
-
-
Event Log
-
-
Waiting for data...
-
-
-
- - - - -
- - - - - diff --git a/deployment/asr-proxy/proxy.py b/deployment/asr-proxy/proxy.py deleted file mode 100644 index db20d1a..0000000 --- a/deployment/asr-proxy/proxy.py +++ /dev/null @@ -1,741 +0,0 @@ -"""DictIA ASR Proxy - Auto-start/stop GCP GPU for WhisperX + Ollama. - -Uses Google Cloud Compute REST API directly (no gcloud CLI needed). -Proxies both ASR (WhisperX) and LLM (Ollama) requests. -Multi-zone fallback across Canada (Montreal + Toronto). -""" - -import asyncio -import json -import logging -import os -import time - -import httpx -import jwt as pyjwt -from pathlib import Path - -from fastapi import FastAPI, Request -from fastapi.responses import HTMLResponse, JSONResponse, Response - -logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") -log = logging.getLogger("asr-proxy") - -# Config — paths relative to this script's directory by default -SCRIPT_DIR = Path(__file__).parent -GCP_PROJECT = os.getenv("GCP_PROJECT", "speakr-gpu") -WHISPERX_PORT = int(os.getenv("WHISPERX_PORT", "9000")) -OLLAMA_PORT = int(os.getenv("OLLAMA_PORT", "11434")) -IDLE_TIMEOUT = int(os.getenv("IDLE_TIMEOUT", "300")) -CREDS_FILE = os.getenv("GOOGLE_APPLICATION_CREDENTIALS", str(SCRIPT_DIR / "gcp-credentials.json")) -STATS_FILE = os.getenv("STATS_FILE", str(SCRIPT_DIR / "usage-stats.json")) -MONTHLY_LIMIT_HOURS = float(os.getenv("MONTHLY_LIMIT_HOURS", "30")) -# Real GCP cost per GPU-hour (g2-standard-4 + L4): GPU ($0.837) + vCPU ($0.151) + RAM ($0.069) -GPU_COST_PER_HOUR = float(os.getenv("GPU_COST_PER_HOUR", "1.06")) -# Fixed monthly costs: SSD disks ($5.66) + snapshots ($4.19) ≈ $9.85/month -FIXED_MONTHLY_COST = float(os.getenv("FIXED_MONTHLY_COST", "9.85")) -SNAPSHOT_NAME = "whisperx-gpu-snapshot" -HEALTH_POLL_INTERVAL = 5 -BOOT_TIMEOUT = 300 - -# Zone fallback order — Canada only, Montreal first -ZONE_FALLBACKS = [ - { - "zone": "northamerica-northeast1-b", - "instance": "whisperx-gpu-mtl1", - "machine_type": "g2-standard-4", - "accelerator": "nvidia-l4", - "accel_count": 1, - "label": "Montreal-b (L4)", - }, - { - "zone": "northamerica-northeast1-c", - "instance": "whisperx-gpu-mtl2", - "machine_type": "n1-standard-4", - "accelerator": "nvidia-tesla-t4", - "accel_count": 1, - "label": "Montreal-c (T4)", - }, - { - "zone": "northamerica-northeast2-a", - "instance": "whisperx-gpu-tor1", - "machine_type": "g2-standard-4", - "accelerator": "nvidia-l4", - "accel_count": 1, - "label": "Toronto-a (L4)", - }, - { - "zone": "northamerica-northeast2-b", - "instance": "whisperx-gpu", - "machine_type": "g2-standard-4", - "accelerator": "nvidia-l4", - "accel_count": 1, - "label": "Toronto-b (L4)", - }, -] - -STARTUP_SCRIPT = """#!/bin/bash -systemctl start docker -sleep 5 -docker start whisperx-asr 2>/dev/null || true -systemctl start ollama 2>/dev/null || true -""" - -app = FastAPI(title="DictIA ASR Proxy") - -# State -last_request_time = 0.0 -active_requests = 0 -gpu_ip: str | None = None -active_zone: dict | None = None -shutdown_task: asyncio.Task | None = None - -# Request history tracking (in-memory, last 20 requests) -request_history: list[dict] = [] -MAX_HISTORY = 20 - -# Zone status tracking -zone_status: dict[str, dict] = {} - -# Startup lock and failure cooldown -_startup_lock: asyncio.Lock | None = None -_last_failure_time: float = 0 -FAILURE_COOLDOWN = 180 - -# OAuth2 token cache -_access_token: str | None = None -_token_expiry: float = 0 - - -# --- Usage Stats --- - -def load_stats() -> dict: - try: - with open(STATS_FILE) as f: - return json.load(f) - except (FileNotFoundError, json.JSONDecodeError): - return {"gpu_seconds": 0, "month": time.strftime("%Y-%m"), "requests": 0, "last_start": 0} - - -def save_stats(stats: dict): - with open(STATS_FILE, "w") as f: - json.dump(stats, f, indent=2) - - -def track_gpu_time(): - stats = load_stats() - current_month = time.strftime("%Y-%m") - if stats.get("month") != current_month: - stats = {"gpu_seconds": 0, "month": current_month, "requests": 0, "last_start": 0} - if stats.get("last_start", 0) > 0: - elapsed = time.time() - stats["last_start"] - stats["gpu_seconds"] += elapsed - stats["last_start"] = 0 - save_stats(stats) - - -def check_budget() -> tuple[bool, float]: - stats = load_stats() - current_month = time.strftime("%Y-%m") - if stats.get("month") != current_month: - return True, 0.0 - hours_used = stats.get("gpu_seconds", 0) / 3600 - return hours_used < MONTHLY_LIMIT_HOURS, hours_used - - -# --- GCP Auth --- - -async def get_access_token() -> str: - global _access_token, _token_expiry - if _access_token and time.time() < _token_expiry - 60: - return _access_token - with open(CREDS_FILE) as f: - creds = json.load(f) - cred_type = creds.get("type", "authorized_user") - async with httpx.AsyncClient() as client: - if cred_type == "service_account": - now = int(time.time()) - payload = { - "iss": creds["client_email"], - "scope": "https://www.googleapis.com/auth/compute", - "aud": "https://oauth2.googleapis.com/token", - "iat": now, - "exp": now + 3600, - } - signed = pyjwt.encode(payload, creds["private_key"], algorithm="RS256") - resp = await client.post( - "https://oauth2.googleapis.com/token", - data={ - "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer", - "assertion": signed, - }, - ) - else: - resp = await client.post( - "https://oauth2.googleapis.com/token", - data={ - "client_id": creds["client_id"], - "client_secret": creds["client_secret"], - "refresh_token": creds["refresh_token"], - "grant_type": "refresh_token", - }, - ) - resp.raise_for_status() - data = resp.json() - _access_token = data["access_token"] - _token_expiry = time.time() + data.get("expires_in", 3600) - log.info(f"Refreshed GCP access token ({cred_type})") - return _access_token - - -# --- GCP Compute API --- - -COMPUTE_BASE = "https://compute.googleapis.com/compute/v1" - - -async def gcp_api(method: str, url: str, **kwargs) -> httpx.Response: - token = await get_access_token() - async with httpx.AsyncClient(timeout=60) as client: - resp = await client.request( - method, url, - headers={"Authorization": f"Bearer {token}"}, - **kwargs, - ) - return resp - - -async def get_instance_info(zone: str, instance: str) -> dict | None: - url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}" - resp = await gcp_api("GET", url) - if resp.status_code == 404: - return None - if resp.status_code >= 400: - log.error(f"GCP API error {resp.status_code}: {resp.text}") - return None - return resp.json() - - -def extract_ip(instance_data: dict) -> str: - interfaces = instance_data.get("networkInterfaces", []) - if interfaces: - access = interfaces[0].get("accessConfigs", []) - if access: - return access[0].get("natIP", "") - return "" - - -async def start_instance_in_zone(zone: str, instance: str) -> bool: - url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}/start" - resp = await gcp_api("POST", url) - if resp.status_code < 400: - log.info(f"Start requested: {instance} in {zone}") - return True - log.warning(f"Failed to start {instance} in {zone}: {resp.status_code} {resp.text}") - return False - - -async def stop_instance_in_zone(zone: str, instance: str): - url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}/stop" - resp = await gcp_api("POST", url) - if resp.status_code < 400: - log.info(f"Stop requested: {instance} in {zone}") - else: - log.error(f"Failed to stop {instance} in {zone}: {resp.status_code}") - - -async def create_instance_from_snapshot(config: dict) -> bool: - zone = config["zone"] - instance = config["instance"] - machine = config["machine_type"] - accel = config["accelerator"] - accel_count = config["accel_count"] - - log.info(f"Creating {instance} in {zone} from snapshot...") - - body = { - "name": instance, - "machineType": f"zones/{zone}/machineTypes/{machine}", - "disks": [{ - "boot": True, - "autoDelete": True, - "initializeParams": { - "diskSizeGb": "50", - "diskType": f"zones/{zone}/diskTypes/pd-ssd", - "sourceSnapshot": f"global/snapshots/{SNAPSHOT_NAME}", - }, - }], - "networkInterfaces": [{ - "network": "global/networks/default", - "accessConfigs": [{"type": "ONE_TO_ONE_NAT", "name": "External NAT"}], - }], - "guestAccelerators": [{ - "acceleratorType": f"zones/{zone}/acceleratorTypes/{accel}", - "acceleratorCount": accel_count, - }], - "scheduling": { - "onHostMaintenance": "TERMINATE", - "automaticRestart": False, - }, - "tags": {"items": ["whisperx-gpu"]}, - "metadata": { - "items": [{"key": "startup-script", "value": STARTUP_SCRIPT}], - }, - } - - url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances" - resp = await gcp_api("POST", url, json=body) - - if resp.status_code < 400: - log.info(f"Created {instance} in {zone}") - return True - - error_text = resp.text - if "ZONE_RESOURCE_POOL_EXHAUSTED" in error_text: - log.warning(f"No capacity in {zone} -- skipping") - elif "QUOTA" in error_text.upper(): - log.warning(f"Quota exceeded for {zone}: {error_text[:200]}") - else: - log.error(f"Failed to create in {zone}: {resp.status_code} {error_text[:200]}") - return False - - -# --- Core Logic --- - -async def wait_for_running(zone: str, instance: str, timeout: int = 120, grace: int = 15) -> bool: - gone_count = 0 - start_time = time.time() - for _ in range(timeout // 5): - info = await get_instance_info(zone, instance) - if info and info.get("status") == "RUNNING": - return True - status = info.get("status", "UNKNOWN") if info else "GONE" - elapsed = time.time() - start_time - if status == "GONE": - gone_count += 1 - if gone_count >= 2: - log.warning(f"{instance} in {zone}: instance disappeared (no capacity)") - return False - if status in ("STOPPING",): - log.warning(f"{instance} in {zone}: status {status} (no capacity)") - return False - if status in ("TERMINATED", "STOPPED") and elapsed > grace: - log.warning(f"{instance} in {zone}: status {status} after {elapsed:.0f}s (no capacity)") - return False - await asyncio.sleep(5) - return False - - -async def delete_instance(zone: str, instance: str): - url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}" - resp = await gcp_api("DELETE", url) - if resp.status_code < 400: - log.info(f"Deleted {instance} in {zone} to free quota") - elif resp.status_code == 404: - pass - else: - log.warning(f"Failed to delete {instance} in {zone}: {resp.status_code}") - - -async def ensure_gpu_running() -> str: - global gpu_ip, active_zone, _last_failure_time - - if _last_failure_time > 0: - remaining = FAILURE_COOLDOWN - (time.time() - _last_failure_time) - if remaining > 0: - log.info(f"GPU cooldown active ({int(remaining)}s remaining), waiting...") - await asyncio.sleep(remaining) - _last_failure_time = 0 - - async with _startup_lock: - ok, hours = check_budget() - if not ok: - raise RuntimeError(f"Monthly GPU limit reached ({hours:.1f}h / {MONTHLY_LIMIT_HOURS}h)") - - if active_zone: - info = await get_instance_info(active_zone["zone"], active_zone["instance"]) - if info and info.get("status") == "RUNNING": - gpu_ip = extract_ip(info) - if gpu_ip: - return gpu_ip - - errors = [] - - for config in ZONE_FALLBACKS: - zone = config["zone"] - instance = config["instance"] - label = config["label"] - - log.info(f"Trying {label}...") - info = await get_instance_info(zone, instance) - - if info is None: - created = await create_instance_from_snapshot(config) - if not created: - zone_status[label] = { - "status": "no_capacity", - "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"), - "last_error": "no capacity", - } - errors.append(f"{label}: no capacity") - continue - if not await wait_for_running(zone, instance, grace=30): - zone_status[label] = { - "status": "error", - "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"), - "last_error": "created but failed to start", - } - errors.append(f"{label}: created but failed to start") - await delete_instance(zone, instance) - await asyncio.sleep(3) - continue - else: - status = info.get("status", "UNKNOWN") - - if status == "RUNNING": - pass - elif status in ("TERMINATED", "STOPPED"): - zone_status[label] = { - "status": "starting", - "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"), - "last_error": None, - } - started = await start_instance_in_zone(zone, instance) - if not started: - zone_status[label] = { - "status": "error", - "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"), - "last_error": "start rejected", - } - errors.append(f"{label}: start rejected") - continue - if not await wait_for_running(zone, instance, grace=20): - zone_status[label] = { - "status": "error", - "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"), - "last_error": "didn't reach RUNNING", - } - errors.append(f"{label}: didn't reach RUNNING") - continue - elif status in ("STAGING", "PROVISIONING"): - zone_status[label] = { - "status": "starting", - "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"), - "last_error": None, - } - if not await wait_for_running(zone, instance): - zone_status[label] = { - "status": "error", - "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"), - "last_error": f"stuck in {status}", - } - errors.append(f"{label}: stuck in {status}") - continue - elif status == "STOPPING": - log.info(f"{label}: STOPPING, deleting to free quota") - await delete_instance(zone, instance) - await asyncio.sleep(3) - zone_status[label] = { - "status": "error", - "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"), - "last_error": "was STOPPING, deleted", - } - errors.append(f"{label}: was STOPPING, deleted") - continue - - info = await get_instance_info(zone, instance) - if info and info.get("status") == "RUNNING": - gpu_ip = extract_ip(info) - if gpu_ip: - active_zone = config - _last_failure_time = 0 - zone_status[label] = { - "status": "running", - "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"), - "last_error": None, - } - stats = load_stats() - stats["last_start"] = time.time() - stats["requests"] = stats.get("requests", 0) + 1 - stats["active_zone"] = label - save_stats(stats) - log.info(f"GPU ready in {label}, IP: {gpu_ip}") - return gpu_ip - - zone_status[label] = { - "status": "error", - "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"), - "last_error": "running but no IP", - } - errors.append(f"{label}: running but no IP") - - _last_failure_time = time.time() - raise RuntimeError( - f"No GPU available in any Canadian zone. Tried: {'; '.join(errors)}" - ) - - -async def ensure_gpu_ready() -> str: - ip = await ensure_gpu_running() - url = f"http://{ip}:{WHISPERX_PORT}/health" - log.info(f"Waiting for WhisperX at {url}...") - async with httpx.AsyncClient(timeout=10) as client: - for _ in range(BOOT_TIMEOUT // HEALTH_POLL_INTERVAL): - try: - resp = await client.get(url) - if resp.status_code == 200: - log.info("WhisperX is healthy!") - return ip - except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout): - pass - await asyncio.sleep(HEALTH_POLL_INTERVAL) - raise RuntimeError("WhisperX did not become healthy in time") - - -async def ensure_ollama_ready() -> str: - ip = await ensure_gpu_running() - url = f"http://{ip}:{OLLAMA_PORT}/api/tags" - log.info(f"Waiting for Ollama at {url}...") - async with httpx.AsyncClient(timeout=10) as client: - for _ in range(BOOT_TIMEOUT // HEALTH_POLL_INTERVAL): - try: - resp = await client.get(url) - if resp.status_code == 200: - log.info("Ollama is healthy!") - return ip - except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout): - pass - await asyncio.sleep(HEALTH_POLL_INTERVAL) - raise RuntimeError("Ollama did not become healthy in time") - - -async def idle_shutdown_loop(): - while True: - await asyncio.sleep(60) - if last_request_time == 0 or active_zone is None: - continue - if active_requests > 0: - continue - elapsed = time.time() - last_request_time - if elapsed >= IDLE_TIMEOUT: - try: - zone = active_zone["zone"] - instance = active_zone["instance"] - label = active_zone["label"] - info = await get_instance_info(zone, instance) - if info and info.get("status") == "RUNNING": - log.info(f"Idle {int(elapsed)}s -- stopping {label}") - await stop_instance_in_zone(zone, instance) - track_gpu_time() - except Exception as e: - log.error(f"Error stopping: {e}") - - -# --- Endpoints --- - -@app.on_event("startup") -async def on_startup(): - global shutdown_task, _startup_lock - _startup_lock = asyncio.Lock() - await get_access_token() - shutdown_task = asyncio.create_task(idle_shutdown_loop()) - zones = ", ".join(c["label"] for c in ZONE_FALLBACKS) - log.info(f"DictIA ASR Proxy started. Zones: [{zones}]. Idle: {IDLE_TIMEOUT}s, limit: {MONTHLY_LIMIT_HOURS}h") - - -@app.post("/asr") -async def asr_proxy(request: Request): - global last_request_time, active_requests - - body = await request.body() - headers = { - k: v for k, v in request.headers.items() - if k.lower() not in ("host", "transfer-encoding") - } - - last_request_time = time.time() - active_requests += 1 - start_time = time.time() - result_status = 200 - try: - ip = await ensure_gpu_ready() - target = f"http://{ip}:{WHISPERX_PORT}/asr" - log.info(f"Forwarding {len(body)} bytes to {target}") - async with httpx.AsyncClient(timeout=httpx.Timeout(7200.0)) as client: - resp = await client.post(target, content=body, headers=headers) - last_request_time = time.time() - result_status = resp.status_code - ct = resp.headers.get("content-type", "") - if "application/json" in ct: - return JSONResponse(content=resp.json(), status_code=resp.status_code) - else: - return JSONResponse(content=resp.text, status_code=resp.status_code) - except httpx.ReadTimeout: - result_status = 504 - return JSONResponse({"error": "Transcription timeout (2h)"}, status_code=504) - except Exception as e: - result_status = 502 - log.error(f"Proxy error: {e}") - return JSONResponse({"error": str(e)}, status_code=502) - finally: - active_requests -= 1 - last_request_time = time.time() - request_history.insert(0, { - "time": time.strftime("%Y-%m-%dT%H:%M:%S"), - "type": "ASR", - "duration_sec": round(time.time() - start_time, 1), - "status": result_status, - "zone": active_zone["label"] if active_zone else "none", - }) - if len(request_history) > MAX_HISTORY: - request_history.pop() - - -@app.get("/health") -async def health(): - zone_label = active_zone["label"] if active_zone else "none" - gpu_status = "unknown" - if active_zone: - try: - info = await get_instance_info(active_zone["zone"], active_zone["instance"]) - gpu_status = info.get("status", "unknown") if info else "not_found" - except Exception: - pass - ok, hours = check_budget() - stats = load_stats() - return { - "proxy": "healthy", - "gpu_instance": gpu_status, - "gpu_zone": zone_label, - "active_requests": active_requests, - "idle_timeout": IDLE_TIMEOUT, - "usage": { - "month": stats.get("month"), - "gpu_hours": round(hours, 2), - "gpu_limit_hours": MONTHLY_LIMIT_HOURS, - "requests_count": stats.get("requests", 0), - "budget_ok": ok, - }, - "gpu_ip": gpu_ip, - "machine_type": active_zone.get("machine_type", "unknown") if active_zone else "unknown", - "gpu_model": active_zone.get("accelerator", "unknown") if active_zone else "unknown", - "idle_seconds": round(time.time() - last_request_time) if last_request_time > 0 else 0, - "auto_shutdown_in": max(0, IDLE_TIMEOUT - round(time.time() - last_request_time)) if last_request_time > 0 and active_zone else None, - "token_expires_in": round(_token_expiry - time.time()) if _token_expiry > 0 else None, - } - - -@app.get("/stats") -async def get_stats(): - stats = load_stats() - hours = stats.get("gpu_seconds", 0) / 3600 - gpu_cost = hours * GPU_COST_PER_HOUR - total_cost = gpu_cost + FIXED_MONTHLY_COST - return { - "month": stats.get("month"), - "gpu_hours": round(hours, 2), - "gpu_minutes": round(hours * 60, 1), - "estimated_cost_usd": round(total_cost, 2), - "gpu_cost_usd": round(gpu_cost, 2), - "fixed_cost_usd": FIXED_MONTHLY_COST, - "monthly_limit_hours": MONTHLY_LIMIT_HOURS, - "remaining_hours": round(MONTHLY_LIMIT_HOURS - hours, 2), - "requests_count": stats.get("requests", 0), - "active_zone": stats.get("active_zone", "none"), - "cost_per_hour": GPU_COST_PER_HOUR, - "recent_requests": request_history[:10], - "zone_fallbacks": [ - { - "label": config["label"], - "zone": config["zone"], - "machine": config["machine_type"], - "gpu": config["accelerator"], - **zone_status.get(config["label"], {"status": "unknown", "last_tried": None, "last_error": None}), - } - for config in ZONE_FALLBACKS - ], - } - - -@app.post("/gpu/start") -async def gpu_start(): - try: - ip = await ensure_gpu_ready() - label = active_zone["label"] if active_zone else "unknown" - return {"status": "running", "ip": ip, "zone": label} - except Exception as e: - return JSONResponse({"error": str(e)}, status_code=503) - - -@app.post("/gpu/stop") -async def gpu_stop(): - if not active_zone: - return {"status": "no active instance"} - try: - await stop_instance_in_zone(active_zone["zone"], active_zone["instance"]) - track_gpu_time() - return {"status": "stopped", "zone": active_zone["label"]} - except Exception as e: - return JSONResponse({"error": str(e)}, status_code=500) - - -DASHBOARD_HTML = Path(__file__).parent / "dashboard.html" - - -@app.get("/", response_class=HTMLResponse) -async def dashboard(): - if DASHBOARD_HTML.exists(): - return HTMLResponse(DASHBOARD_HTML.read_text(encoding="utf-8")) - return HTMLResponse("

Dashboard not found

Place dashboard.html next to proxy.py

", status_code=404) - - -@app.api_route("/v1/{path:path}", methods=["POST", "GET"]) -async def llm_proxy(request: Request, path: str): - global last_request_time, active_requests - - body = await request.body() - headers = { - k: v for k, v in request.headers.items() - if k.lower() not in ("host", "transfer-encoding") - } - - last_request_time = time.time() - active_requests += 1 - start_time = time.time() - result_status = 200 - try: - ip = await ensure_ollama_ready() - target = f"http://{ip}:{OLLAMA_PORT}/v1/{path}" - log.info(f"Forwarding LLM request to {target}") - async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client: - resp = await client.request(request.method, target, content=body, headers=headers) - last_request_time = time.time() - result_status = resp.status_code - return Response( - content=resp.content, - status_code=resp.status_code, - media_type=resp.headers.get("content-type"), - ) - except httpx.ReadTimeout: - result_status = 504 - return JSONResponse({"error": "LLM timeout (5min)"}, status_code=504) - except Exception as e: - result_status = 502 - log.error(f"LLM proxy error: {e}") - return JSONResponse({"error": str(e)}, status_code=502) - finally: - active_requests -= 1 - last_request_time = time.time() - request_history.insert(0, { - "time": time.strftime("%Y-%m-%dT%H:%M:%S"), - "type": "LLM", - "duration_sec": round(time.time() - start_time, 1), - "status": result_status, - "zone": active_zone["label"] if active_zone else "none", - }) - if len(request_history) > MAX_HISTORY: - request_history.pop() - - -if __name__ == "__main__": - import uvicorn - uvicorn.run(app, host="0.0.0.0", port=9090) diff --git a/deployment/asr-proxy/requirements.txt b/deployment/asr-proxy/requirements.txt deleted file mode 100644 index f301f93..0000000 --- a/deployment/asr-proxy/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -fastapi==0.115.0 -uvicorn==0.30.0 -httpx==0.27.0 -PyJWT==2.9.0 -cryptography>=43.0.0 diff --git a/deployment/asr-proxy/setup.sh b/deployment/asr-proxy/setup.sh deleted file mode 100644 index f0d88f8..0000000 --- a/deployment/asr-proxy/setup.sh +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env bash -# DictIA ASR Proxy — Setup script -# Installs the GCP GPU proxy for cloud deployments. -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -INSTALL_DIR="${ASR_PROXY_DIR:-$SCRIPT_DIR}" -SERVICE_USER="${ASR_PROXY_USER:-$(whoami)}" - -echo "=== DictIA ASR Proxy Setup ===" -echo "Install directory: $INSTALL_DIR" -echo "Service user: $SERVICE_USER" -echo - -# 1. Create virtual environment -if [ ! -d "$INSTALL_DIR/venv" ]; then - echo "[1/4] Creating Python virtual environment..." - python3 -m venv "$INSTALL_DIR/venv" -else - echo "[1/4] Virtual environment already exists." -fi - -# 2. Install dependencies -echo "[2/4] Installing Python dependencies..." -"$INSTALL_DIR/venv/bin/pip" install --quiet --upgrade pip -"$INSTALL_DIR/venv/bin/pip" install --quiet -r "$INSTALL_DIR/requirements.txt" - -# 3. GCP credentials -if [ ! -f "$INSTALL_DIR/gcp-credentials.json" ]; then - echo "[3/4] GCP credentials not found." - echo " Place your GCP service account or OAuth credentials at:" - echo " $INSTALL_DIR/gcp-credentials.json" - echo - echo " For service account: download JSON from GCP Console > IAM > Service Accounts" - echo " For user credentials: run 'gcloud auth application-default login' and copy the file" - echo - read -rp " Path to credentials file (or press Enter to skip): " CREDS_PATH - if [ -n "$CREDS_PATH" ] && [ -f "$CREDS_PATH" ]; then - cp "$CREDS_PATH" "$INSTALL_DIR/gcp-credentials.json" - chmod 600 "$INSTALL_DIR/gcp-credentials.json" - echo " Credentials copied." - else - echo " Skipped. You must add credentials before starting the proxy." - fi -else - echo "[3/4] GCP credentials found." -fi - -# 4. Install systemd service -echo "[4/4] Installing systemd service..." -SERVICE_FILE="/etc/systemd/system/asr-proxy.service" - -cat > /tmp/asr-proxy.service </dev/null 2>&1; then - echo "ERROR: Tailscale is not running or not connected." - echo " Install: curl -fsSL https://tailscale.com/install.sh | sh" - echo " Connect: sudo tailscale up" - exit 1 -fi - -HOSTNAME=$(tailscale status --json | python3 -c "import sys,json; print(json.load(sys.stdin)['Self']['DNSName'].rstrip('.'))" 2>/dev/null || echo "unknown") -echo "Tailscale hostname: $HOSTNAME" -echo - -# DictIA app on :443 → localhost:8899 -echo "[1/2] Setting up DictIA app (port 443 → 8899)..." -if [ "$MODE" = "funnel" ]; then - tailscale funnel --bg --https=443 http://localhost:8899 -else - tailscale serve --bg --https=443 http://localhost:8899 -fi - -# ASR Proxy dashboard on :9443 → localhost:9090 -echo "[2/2] Setting up ASR Proxy dashboard (port 9443 → 9090)..." -if [ "$MODE" = "funnel" ]; then - tailscale funnel --bg --https=9443 http://localhost:9090 -else - tailscale serve --bg --https=9443 http://localhost:9090 -fi - -echo -echo "=== Setup complete ===" -echo "DictIA: https://$HOSTNAME/" -echo "ASR Dashboard: https://$HOSTNAME:9443/" -echo -echo "Verify with: tailscale serve status" diff --git a/deployment/docker/.env.example b/deployment/docker/.env.example deleted file mode 100644 index fc204f9..0000000 --- a/deployment/docker/.env.example +++ /dev/null @@ -1,124 +0,0 @@ -# ============================================================================= -# DictIA — Unified Environment Configuration -# ============================================================================= -# -# Copy this file to the project root as .env and edit the values. -# cp deployment/docker/.env.example .env -# -# This template combines upstream settings with DictIA deployment vars. -# See: config/env.transcription.example for full upstream documentation. - -# ============================================================================= -# FLASK SECRET KEY (REQUIRED — auto-generated by setup.sh) -# ============================================================================= -SECRET_KEY=change-me-to-a-random-string - -# ============================================================================= -# DEPLOYMENT PROFILE (used by deployment scripts) -# ============================================================================= -# Options: cloud, local-cpu, local-gpu -DICTIA_PROFILE=cloud - -# ============================================================================= -# TEXT GENERATION MODEL (REQUIRED for summaries, titles, chat) -# ============================================================================= -TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1 -TEXT_MODEL_API_KEY=your_openrouter_api_key -TEXT_MODEL_NAME=openai/gpt-4o-mini - -# ============================================================================= -# TRANSCRIPTION CONFIGURATION -# ============================================================================= -# For cloud profile (ASR Proxy → GCP GPU): -# ASR_BASE_URL is set automatically in docker-compose.cloud.yml -# No need to set it here. -# -# For local profiles (WhisperX sidecar): -# ASR_BASE_URL is set automatically in docker-compose.local-*.yml -# No need to set it here. -# -# For OpenAI API instead of self-hosted ASR: -# TRANSCRIPTION_API_KEY=sk-your_openai_api_key -# TRANSCRIPTION_MODEL=gpt-4o-transcribe-diarize - -# ASR model (for local WhisperX profiles) -ASR_MODEL=large-v3 - -# HuggingFace token (required for diarization with pyannote) -# Get yours at: https://huggingface.co/settings/tokens -# Must accept: https://huggingface.co/pyannote/speaker-diarization-3.1 -HF_TOKEN= - -# ============================================================================= -# ASR PROXY — CLOUD PROFILE ONLY -# ============================================================================= -# GCP project for GPU instances -# GCP_PROJECT=your-gcp-project - -# Monthly GPU budget limit in hours (default: 50) -# MONTHLY_LIMIT_HOURS=50 - -# Idle timeout before auto-stopping GPU (seconds, default: 300) -# IDLE_TIMEOUT=300 - -# ============================================================================= -# APPLICATION SETTINGS -# ============================================================================= -ADMIN_USERNAME=admin -ADMIN_EMAIL=admin@example.com -ADMIN_PASSWORD=changeme - -ALLOW_REGISTRATION=false -TIMEZONE="America/Toronto" -LOG_LEVEL=ERROR -LOCALE=fr_CA -DEFAULT_LANGUAGE=fr -SHOW_USERNAMES_IN_UI=true -SESSION_COOKIE_HTTPONLY=true -SESSION_COOKIE_SAMESITE=Lax -SESSION_COOKIE_SECURE=true - -# ============================================================================= -# OPTIONAL FEATURES -# ============================================================================= -ENABLE_INQUIRE_MODE=false -ENABLE_AUTO_PROCESSING=false -ENABLE_AUTO_EXPORT=false -ENABLE_AUTO_DELETION=false -ENABLE_INTERNAL_SHARING=true -ENABLE_PUBLIC_SHARING=true -ENABLE_FOLDERS=true -VIDEO_RETENTION=true -USERS_CAN_DELETE=true - -# ============================================================================= -# BACKGROUND PROCESSING -# ============================================================================= -JOB_QUEUE_WORKERS=4 -SUMMARY_QUEUE_WORKERS=4 -JOB_MAX_RETRIES=3 -MAX_CONCURRENT_UPLOADS=3 - -# ============================================================================= -# TRANSCRIPTION SETTINGS -# ============================================================================= -TRANSCRIPTION_CONNECTOR=asr_endpoint -USE_NEW_TRANSCRIPTION_ARCHITECTURE=true -ENABLE_CHUNKING=true -CHUNK_LIMIT=2400s -CHUNK_OVERLAP_SECONDS=5 - -# ============================================================================= -# LLM / SUMMARY SETTINGS -# ============================================================================= -SUMMARY_LANGUAGE=fr -SUMMARY_MAX_TOKENS=16000 -CHAT_MAX_TOKENS=12000 -ENABLE_STREAM_OPTIONS=false -ENABLE_THINKING=false - -# ============================================================================= -# DOCKER/DATABASE -# ============================================================================= -SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db -UPLOAD_FOLDER=/data/uploads diff --git a/deployment/docker/docker-compose.cloud.yml b/deployment/docker/docker-compose.cloud.yml deleted file mode 100644 index d4ae233..0000000 --- a/deployment/docker/docker-compose.cloud.yml +++ /dev/null @@ -1,40 +0,0 @@ -# DictIA — Cloud deployment (VPS + ASR Proxy GCP GPU) -# -# Usage: -# docker compose -f deployment/docker/docker-compose.cloud.yml up -d -# -# ASR is handled by the external asr-proxy (port 9090) which auto-starts -# a GCP GPU instance on demand. DictIA connects via host.docker.internal. - -services: - dictia: - build: - context: ../.. - dockerfile: Dockerfile - image: innova-ai/dictia:latest - container_name: dictia - restart: unless-stopped - ports: - - "8899:8899" - env_file: - - ../../.env - environment: - - LOG_LEVEL=${LOG_LEVEL:-ERROR} - - ASR_BASE_URL=http://host.docker.internal:9090 - volumes: - - ../../data/uploads:/data/uploads - - ../../data/instance:/data/instance - extra_hosts: - - "host.docker.internal:host-gateway" - healthcheck: - test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 30s - networks: - - dictia-network - -networks: - dictia-network: - driver: bridge diff --git a/deployment/docker/docker-compose.local-cpu.yml b/deployment/docker/docker-compose.local-cpu.yml deleted file mode 100644 index 0a0f060..0000000 --- a/deployment/docker/docker-compose.local-cpu.yml +++ /dev/null @@ -1,64 +0,0 @@ -# DictIA — Local CPU deployment (WhisperX on CPU + DictIA) -# -# Usage: -# docker compose -f deployment/docker/docker-compose.local-cpu.yml up -d -# -# Warning: CPU transcription is significantly slower than GPU. -# Expect ~10x real-time for large files (e.g., 1h audio = ~10h processing). - -services: - whisperx-asr: - image: ghcr.io/jim60105/whisperx-asr:latest - container_name: whisperx-asr - restart: unless-stopped - ports: - - "9000:9000" - environment: - - ASR_MODEL=${ASR_MODEL:-large-v3} - - ASR_ENGINE=whisperx - - DEVICE=cpu - - COMPUTE_TYPE=float32 - - HF_TOKEN=${HF_TOKEN:-} - volumes: - - whisperx-cache:/root/.cache - deploy: - resources: - limits: - memory: 18G - networks: - - dictia-network - - dictia: - build: - context: ../.. - dockerfile: Dockerfile - image: innova-ai/dictia:latest - container_name: dictia - restart: unless-stopped - ports: - - "8899:8899" - env_file: - - ../../.env - environment: - - LOG_LEVEL=${LOG_LEVEL:-ERROR} - - ASR_BASE_URL=http://whisperx-asr:9000 - volumes: - - ../../data/uploads:/data/uploads - - ../../data/instance:/data/instance - depends_on: - - whisperx-asr - healthcheck: - test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 30s - networks: - - dictia-network - -volumes: - whisperx-cache: - -networks: - dictia-network: - driver: bridge diff --git a/deployment/docker/docker-compose.local-gpu.yml b/deployment/docker/docker-compose.local-gpu.yml deleted file mode 100644 index 488fd74..0000000 --- a/deployment/docker/docker-compose.local-gpu.yml +++ /dev/null @@ -1,69 +0,0 @@ -# DictIA — Local GPU deployment (WhisperX on NVIDIA GPU + DictIA) -# -# Usage: -# docker compose -f deployment/docker/docker-compose.local-gpu.yml up -d -# -# Prerequisites: -# - NVIDIA GPU with CUDA support -# - nvidia-container-toolkit installed -# - Docker configured with nvidia runtime - -services: - whisperx-asr: - image: ghcr.io/jim60105/whisperx-asr:latest-cuda - container_name: whisperx-asr - restart: unless-stopped - ports: - - "9000:9000" - environment: - - ASR_MODEL=${ASR_MODEL:-large-v3} - - ASR_ENGINE=whisperx - - DEVICE=cuda - - COMPUTE_TYPE=float16 - - HF_TOKEN=${HF_TOKEN:-} - volumes: - - whisperx-cache:/root/.cache - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - networks: - - dictia-network - - dictia: - build: - context: ../.. - dockerfile: Dockerfile - image: innova-ai/dictia:latest - container_name: dictia - restart: unless-stopped - ports: - - "8899:8899" - env_file: - - ../../.env - environment: - - LOG_LEVEL=${LOG_LEVEL:-ERROR} - - ASR_BASE_URL=http://whisperx-asr:9000 - volumes: - - ../../data/uploads:/data/uploads - - ../../data/instance:/data/instance - depends_on: - - whisperx-asr - healthcheck: - test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"] - interval: 30s - timeout: 10s - retries: 3 - start_period: 30s - networks: - - dictia-network - -volumes: - whisperx-cache: - -networks: - dictia-network: - driver: bridge diff --git a/deployment/docs/LOCAL-SETUP.md b/deployment/docs/LOCAL-SETUP.md deleted file mode 100644 index f534972..0000000 --- a/deployment/docs/LOCAL-SETUP.md +++ /dev/null @@ -1,118 +0,0 @@ -# Setup Local — DictIA - -Guide pour deployer DictIA localement avec GPU NVIDIA ou CPU. - -## Profil local-gpu - -### Prerequis - -- NVIDIA GPU avec support CUDA -- [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) -- Docker + Docker Compose V2 -- 8GB+ RAM (16GB recommande) -- Token HuggingFace (pour la diarisation) - -### Installation nvidia-container-toolkit - -```bash -# Ubuntu/Debian -curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \ - sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg -curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ - sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ - sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list -sudo apt-get update -sudo apt-get install -y nvidia-container-toolkit -sudo nvidia-ctk runtime configure --runtime=docker -sudo systemctl restart docker - -# Verifier -docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi -``` - -### Setup DictIA - -```bash -cd dictia -bash deployment/setup.sh --profile local-gpu -``` - -Le setup va verifier: -- nvidia-container-toolkit installe -- GPU accessible depuis Docker -- Assez de RAM disponible - -### Configuration du modele - -Par defaut, WhisperX utilise `large-v3`. Pour changer: - -```bash -# Editer .env -ASR_MODEL=large-v3 # Meilleure qualite -# ASR_MODEL=medium # Plus rapide, qualite correcte -# ASR_MODEL=small # Tres rapide, qualite reduite -``` - ---- - -## Profil local-cpu - -### Prerequis - -- Docker + Docker Compose V2 -- 18GB+ RAM (WhisperX CPU est gourmand) -- Patience (transcription ~10x temps reel) - -### Setup - -```bash -cd dictia -bash deployment/setup.sh --profile local-cpu -``` - -### Limitations - -- Transcription lente: 1h d'audio prend ~10h -- Utilise float32 (pas de GPU acceleration) -- Limite memoire a 18GB par defaut -- Recommande pour: tests, petits fichiers, demos - -Pour reduire l'utilisation memoire, utiliser un modele plus petit: - -```bash -# Editer .env -ASR_MODEL=small # ou medium, base, tiny -``` - ---- - -## Verification - -```bash -# Health check -bash deployment/tools/health-check.sh - -# Test rapide: ouvrir le navigateur -open http://localhost:8899 - -# Verifier WhisperX -curl http://localhost:9000/health -``` - -## Gestion des containers - -```bash -COMPOSE_FILE=deployment/docker/docker-compose.local-gpu.yml # ou local-cpu - -# Logs -docker compose -f $COMPOSE_FILE logs -f - -# Redemarrer -docker compose -f $COMPOSE_FILE restart - -# Arreter -docker compose -f $COMPOSE_FILE down - -# Voir l'utilisation GPU -nvidia-smi # (profil GPU seulement) -``` diff --git a/deployment/docs/MAINTENANCE.md b/deployment/docs/MAINTENANCE.md deleted file mode 100644 index f43b963..0000000 --- a/deployment/docs/MAINTENANCE.md +++ /dev/null @@ -1,136 +0,0 @@ -# Maintenance — DictIA - -## Backup - -```bash -# Backup complet (data, .env, volumes, stats ASR) -bash deployment/tools/backup.sh - -# Backup dans un repertoire specifique -bash deployment/tools/backup.sh /mnt/backups -``` - -Les backups sont sauvegardes dans `backups/` avec rotation automatique (garde les 5 derniers). - -Contenu d'un backup: -- `data/` — uploads et base de donnees SQLite -- `dot-env` — fichier de configuration -- `asr-usage-stats.json` — stats d'utilisation GPU -- `whisperx-cache.tar.gz` — cache modeles (si volume Docker) -- `manifest.json` — metadonnees du backup - -### Schedule recommande - -| Frequence | Action | -|-----------|--------| -| Quotidien | `bash deployment/tools/backup.sh` | -| Hebdomadaire | Copier le backup sur un stockage externe | -| Mensuel | Verifier la restauration sur un environnement de test | - -Pour automatiser avec cron: - -```bash -# Backup quotidien a 3h du matin -0 3 * * * /opt/dictia/deployment/tools/backup.sh >> /var/log/dictia-backup.log 2>&1 -``` - -## Restore - -```bash -# Lister les backups disponibles -ls -la backups/ - -# Restaurer un backup -bash deployment/tools/restore.sh backups/dictia-20260211-030000.tar.gz -``` - -Le script: -1. Valide l'archive (presence du manifest) -2. Demande confirmation -3. Arrete les containers -4. Restaure les fichiers -5. Redemarre les containers - -## Mise a jour - -```bash -# Mise a jour complete (git pull + rebuild + restart) -bash deployment/tools/update.sh - -# Rebuild seulement (sans git pull) -bash deployment/tools/update.sh --no-pull - -# Git pull seulement (sans rebuild) -bash deployment/tools/update.sh --no-build -``` - -Le script: -1. Detecte le profil actif automatiquement -2. `git pull origin dictia-branding` -3. `docker build -t innova-ai/dictia:latest .` -4. Pull WhisperX upstream (profils locaux) -5. `docker compose down && up -d` -6. Attend le health check -7. Nettoie les images dangling - -## Monitoring - -### Health check - -```bash -# Diagnostic complet (humain) -bash deployment/tools/health-check.sh - -# JSON (pour alertes/scripts) -bash deployment/tools/health-check.sh --json - -# Code de sortie seulement (0=ok, 1=probleme) -bash deployment/tools/health-check.sh --quiet -``` - -### Logs - -```bash -# DictIA -docker logs dictia -f --tail 100 - -# WhisperX (profils locaux) -docker logs whisperx-asr -f --tail 100 - -# ASR Proxy (profil cloud) -journalctl -u asr-proxy -f -``` - -### Dashboard GPU (profil cloud) - -Le dashboard de monitoring GPU est accessible a: -- `http://localhost:9090` (local) -- `https://votre-hostname.tailnet.ts.net:9443` (Tailscale) - -Affiche: statut GPU, cout mensuel, historique des requetes, zones de fallback. - -### Metriques cles - -```bash -# Espace disque (les transcriptions grossissent) -df -h /opt/dictia/data/ - -# Utilisation memoire (WhisperX est gourmand) -docker stats --no-stream - -# Stats GPU (profil cloud) -curl -s http://localhost:9090/stats | python3 -m json.tool -``` - -## Maintenance Docker - -```bash -# Nettoyer les images orphelines -docker image prune -f - -# Nettoyer tout (attention: supprime les volumes non utilises) -# docker system prune -a --volumes - -# Verifier l'espace Docker -docker system df -``` diff --git a/deployment/docs/QUICKSTART.md b/deployment/docs/QUICKSTART.md deleted file mode 100644 index f057175..0000000 --- a/deployment/docs/QUICKSTART.md +++ /dev/null @@ -1,90 +0,0 @@ -# Quickstart — DictIA - -## Prerequis communs - -- Docker + Docker Compose V2 -- Git -- 2GB+ RAM disponible - -```bash -git clone https://gitea.innova-ai.ca/Innova-AI/dictia.git -cd dictia -git checkout dictia-branding -``` - ---- - -## Profil Cloud (VPS + GCP GPU) - -Le GPU demarre automatiquement quand quelqu'un transcrit, et s'arrete apres 5 min d'inactivite. - -```bash -# 1. Setup interactif -bash deployment/setup.sh --profile cloud - -# 2. Setup ASR Proxy (GCP credentials requises) -bash deployment/asr-proxy/setup.sh - -# 3. Optionnel: Tailscale Serve pour HTTPS -bash deployment/config/tailscale/setup-serve.sh -``` - -**Requis**: credentials GCP (service account ou OAuth) dans `deployment/asr-proxy/gcp-credentials.json`. - ---- - -## Profil Local GPU - -Transcription locale sur GPU NVIDIA. Le plus rapide. - -```bash -# Prerequis: nvidia-container-toolkit -# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html - -# Setup -bash deployment/setup.sh --profile local-gpu -``` - -**Requis**: token HuggingFace pour la diarisation (pyannote). - ---- - -## Profil Local CPU - -Transcription sur CPU. Lent mais fonctionnel pour tester. - -```bash -bash deployment/setup.sh --profile local-cpu -``` - -Prevoir ~10x le temps reel (1h audio = ~10h de traitement). - ---- - -## Apres l'installation - -```bash -# Verifier que tout fonctionne -bash deployment/tools/health-check.sh - -# Ouvrir DictIA -open http://localhost:8899 -``` - -Se connecter avec les identifiants admin configures pendant le setup. - -## Commandes utiles - -```bash -# Logs en temps reel -docker compose -f deployment/docker/docker-compose..yml logs -f - -# Redemarrer -docker compose -f deployment/docker/docker-compose..yml restart - -# Mise a jour -bash deployment/tools/update.sh - -# Backup -bash deployment/tools/backup.sh -``` diff --git a/deployment/docs/TROUBLESHOOTING.md b/deployment/docs/TROUBLESHOOTING.md deleted file mode 100644 index d733a7c..0000000 --- a/deployment/docs/TROUBLESHOOTING.md +++ /dev/null @@ -1,177 +0,0 @@ -# Troubleshooting — DictIA - -## WhisperX OOM (Out of Memory) - -**Symptome**: Container `whisperx-asr` crash ou restart en boucle. - -**Cause**: Modele trop gros pour la RAM/VRAM disponible. - -**Solutions**: -```bash -# Utiliser un modele plus petit dans .env -ASR_MODEL=medium # au lieu de large-v3 - -# Augmenter la limite memoire (local-cpu) -# Editer docker-compose.local-cpu.yml -deploy: - resources: - limits: - memory: 24G # au lieu de 18G -``` - -## Diarisation 403 Forbidden - -**Symptome**: Erreur 403 lors de la transcription avec diarisation. - -**Cause**: Token HuggingFace manquant ou conditions non acceptees. - -**Solution**: -1. Creer un token: https://huggingface.co/settings/tokens -2. Accepter les conditions: https://huggingface.co/pyannote/speaker-diarization-3.1 -3. Ajouter dans `.env`: -```bash -HF_TOKEN=hf_votre_token -``` -4. Redemarrer: `docker compose -f deployment/docker/docker-compose..yml restart` - -## GPU non detecte (local-gpu) - -**Symptome**: `nvidia-smi` fonctionne mais Docker ne voit pas le GPU. - -**Solution**: -```bash -# Installer nvidia-container-toolkit -sudo apt install -y nvidia-container-toolkit -sudo nvidia-ctk runtime configure --runtime=docker -sudo systemctl restart docker - -# Verifier -docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi -``` - -## Upload echoue (fichiers volumineux) - -**Symptome**: Upload de gros fichiers (>100MB) echoue. - -**Causes possibles**: -- Timeout Nginx/reverse proxy -- Limite upload trop basse - -**Solutions**: -```bash -# Si Nginx: verifier client_max_body_size dans dictia.conf -client_max_body_size 500M; - -# Si Tailscale Serve: pas de limite cote Tailscale - -# Timeout gunicorn (dans le Dockerfile, deja a 600s) -# Pour des fichiers tres longs, augmenter dans docker-compose: -environment: - - GUNICORN_TIMEOUT=1200 -``` - -## Container dictia "unhealthy" - -**Symptome**: `docker ps` montre "unhealthy" pour le container dictia. - -**Diagnostic**: -```bash -# Voir les logs -docker logs dictia --tail 50 - -# Tester manuellement -docker exec dictia python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')" -``` - -**Causes courantes**: -- `.env` mal configure (SECRET_KEY manquant) -- Base de donnees corrompue (restaurer backup) -- Port 8899 deja utilise - -## ASR Proxy: "No GPU available" - -**Symptome**: Transcription echoue avec "No GPU available in any Canadian zone". - -**Causes**: -- GCP n'a pas de GPU disponible (capacite epuisee) -- Credentials GCP expirees -- Budget mensuel atteint - -**Diagnostic**: -```bash -# Verifier le statut du proxy -curl -s http://localhost:9090/health | python3 -m json.tool - -# Verifier les stats (budget) -curl -s http://localhost:9090/stats | python3 -m json.tool - -# Voir les logs -journalctl -u asr-proxy --since "1 hour ago" -``` - -**Solutions**: -- Attendre (GCP libere des GPUs regulierement) -- Le proxy reessaie automatiquement apres un cooldown de 3 minutes -- Verifier le dashboard: http://localhost:9090 - -## Build Docker lent/echoue - -**Symptome**: `docker build` prend trop de temps ou echoue. - -**Solutions**: -```bash -# Limiter les ressources si le VPS est petit -docker build --memory=2g --cpus=2 -t innova-ai/dictia:latest . - -# Nettoyer le cache Docker si le disque est plein -docker builder prune -f -docker image prune -f -``` - -## Base de donnees corrompue - -**Symptome**: Erreur SQLite au demarrage. - -**Solution**: -```bash -# Restaurer le dernier backup -bash deployment/tools/restore.sh backups/dictia-LATEST.tar.gz - -# Ou recreer la base (perd les donnees) -rm data/instance/transcriptions.db -docker compose -f deployment/docker/docker-compose..yml restart -``` - -## Port 8899 deja utilise - -```bash -# Trouver qui utilise le port -sudo lsof -i :8899 -# ou -sudo ss -tlnp | grep 8899 - -# Arreter le processus ou changer le port dans docker-compose -ports: - - "8900:8899" # utiliser 8900 a la place -``` - -## Mise a jour qui casse tout - -```bash -# Rollback: revenir au commit precedent -cd dictia -git log --oneline -5 # trouver le bon commit -git checkout - -# Rebuild et redemarrer -docker build -t innova-ai/dictia:latest . -docker compose -f deployment/docker/docker-compose..yml down -docker compose -f deployment/docker/docker-compose..yml up -d -``` - -## Commande de diagnostic rapide - -```bash -# Tout verifier d'un coup -bash deployment/tools/health-check.sh --json | python3 -m json.tool -``` diff --git a/deployment/docs/VPS-SETUP.md b/deployment/docs/VPS-SETUP.md deleted file mode 100644 index deff17d..0000000 --- a/deployment/docs/VPS-SETUP.md +++ /dev/null @@ -1,148 +0,0 @@ -# Setup VPS from scratch — DictIA - -Guide complet pour deployer DictIA sur un VPS Ubuntu. -Teste sur OVH VPS avec Ubuntu 22.04/24.04. - -## 1. Preparation du VPS - -```bash -# Mise a jour systeme -sudo apt update && sudo apt upgrade -y - -# Installer les essentiels -sudo apt install -y curl git -``` - -## 2. Docker - -```bash -# Installer Docker (methode officielle) -curl -fsSL https://get.docker.com | sh - -# Ajouter l'utilisateur au groupe docker -sudo usermod -aG docker $USER - -# Se reconnecter pour appliquer le groupe -exit -# (reconnecter via SSH) - -# Verifier -docker --version -docker compose version -``` - -## 3. Tailscale (recommande) - -Tailscale fournit un VPN mesh pour acceder au VPS sans exposer de ports publics. - -```bash -# Installer Tailscale -curl -fsSL https://tailscale.com/install.sh | sh - -# Connecter au tailnet -sudo tailscale up - -# Verifier -tailscale status -``` - -## 4. DictIA - -```bash -# Cloner le repo -cd ~ -git clone https://gitea.innova-ai.ca/Innova-AI/dictia.git -cd dictia -git checkout dictia-branding - -# Lancer le setup -bash deployment/setup.sh --profile cloud -``` - -Le setup va: -- Generer le `.env` avec vos identifiants -- Creer les repertoires de donnees -- Builder l'image Docker -- Demarrer les containers - -## 5. ASR Proxy (GCP GPU) - -```bash -# Installer le proxy -bash deployment/asr-proxy/setup.sh - -# Ajouter les credentials GCP -# Copier votre fichier de credentials dans: -cp ~/gcp-credentials.json deployment/asr-proxy/gcp-credentials.json - -# Demarrer le service -sudo systemctl start asr-proxy -sudo systemctl status asr-proxy -``` - -## 6. Securite - -```bash -# Docker daemon config (log rotation) -sudo cp deployment/security/docker-daemon.json /etc/docker/daemon.json -sudo systemctl restart docker - -# Firewall iptables (bloque trafic non-Tailscale) -sudo bash deployment/security/iptables-rules.sh - -# Service systemd pour les regles au boot -sudo cp deployment/security/docker-iptables.service /etc/systemd/system/ -sudo systemctl daemon-reload -sudo systemctl enable docker-iptables -``` - -## 7. Tailscale Serve (HTTPS) - -```bash -# Expose DictIA et le dashboard ASR via Tailscale HTTPS -bash deployment/config/tailscale/setup-serve.sh - -# Verifier -tailscale serve status -``` - -DictIA sera accessible a `https://votre-hostname.tailnet.ts.net/`. - -## 8. Service systemd (auto-start) - -```bash -# Adapter le chemin dans le fichier si necessaire -sudo cp deployment/config/systemd/dictia.service /etc/systemd/system/ -sudo systemctl daemon-reload -sudo systemctl enable dictia -``` - -## 9. Verification - -```bash -# Health check complet -bash deployment/tools/health-check.sh - -# Verifier les endpoints -curl -s http://localhost:8899/health -curl -s http://localhost:9090/health -``` - -## 10. Premier backup - -```bash -bash deployment/tools/backup.sh -``` - ---- - -## Checklist post-installation - -- [ ] DictIA repond sur :8899 -- [ ] ASR Proxy repond sur :9090 -- [ ] Tailscale Serve configure -- [ ] iptables: seul Tailscale peut acceder -- [ ] Docker: log rotation configuree -- [ ] Service systemd enable (auto-start au boot) -- [ ] Premier backup effectue -- [ ] Identifiants admin testes diff --git a/deployment/profiles/docker-compose.dictia16.yml b/deployment/profiles/docker-compose.dictia16.yml deleted file mode 100644 index a553bb7..0000000 --- a/deployment/profiles/docker-compose.dictia16.yml +++ /dev/null @@ -1,101 +0,0 @@ -# ============================================================================= -# DictIA 16 — Docker Compose -# GPU : RTX 5070 Ti (16 Go VRAM) -# ============================================================================= -# -# Services : -# - dictia : Application principale DictIA -# - whisperx-asr : Service de transcription WhisperX Large-v3 -# - ollama : LLM local Mistral 7B (résumés, chat, Q&A) -# -# Démarrage : -# 1. cp config/env.dictia16.example .env -# 2. docker compose -f config/docker-compose.dictia16.yml up -d -# 3. Télécharger Mistral : docker exec ollama ollama pull mistral -# -# Note : Aucune clé API nécessaire — tout tourne en local (100% privé). -# ============================================================================= - -services: - - # --------------------------------------------------------------------------- - # Application DictIA - # --------------------------------------------------------------------------- - dictia: - image: dictia:latest - container_name: dictia - restart: unless-stopped - ports: - - "8899:8899" - env_file: - - ../.env - environment: - - LOG_LEVEL=ERROR - volumes: - - ../uploads:/data/uploads - - ../instance:/data/instance - # Décommenter pour l'export automatique : - # - ../exports:/data/exports - # Décommenter pour le traitement automatique : - # - ../auto-process:/data/auto-process - depends_on: - - whisperx-asr - - ollama - networks: - - dictia-net - - # --------------------------------------------------------------------------- - # WhisperX ASR — Transcription locale (WhisperX Large-v3) - # RTX 5070 Ti : BATCH_SIZE=32, COMPUTE_TYPE=float16 - # --------------------------------------------------------------------------- - whisperx-asr: - image: murtazanasir/whisperx-asr-service:latest - container_name: whisperx-asr - restart: unless-stopped - environment: - - HF_TOKEN=${HF_TOKEN} - - DEVICE=cuda - - COMPUTE_TYPE=float16 - - BATCH_SIZE=32 - - DEFAULT_MODEL=large-v3 - volumes: - - whisperx-models:/root/.cache - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - networks: - - dictia-net - - # --------------------------------------------------------------------------- - # Ollama — LLM local Mistral 7B - # Résumés, points d'action, Q&A — 100% local, aucune donnée externe - # --------------------------------------------------------------------------- - ollama: - image: ollama/ollama:latest - container_name: ollama - restart: unless-stopped - volumes: - - ollama-models:/root/.ollama - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - networks: - - dictia-net - -networks: - dictia-net: - driver: bridge - -volumes: - whisperx-models: - driver: local - ollama-models: - driver: local diff --git a/deployment/profiles/docker-compose.dictia8.yml b/deployment/profiles/docker-compose.dictia8.yml deleted file mode 100644 index dc4c6ed..0000000 --- a/deployment/profiles/docker-compose.dictia8.yml +++ /dev/null @@ -1,75 +0,0 @@ -# ============================================================================= -# DictIA 8 — Docker Compose -# GPU : RTX 5060 (8 Go VRAM) -# ============================================================================= -# -# Services : -# - dictia : Application principale DictIA -# - whisperx-asr : Service de transcription WhisperX Large-v3 -# -# Démarrage : -# 1. cp config/env.dictia8.example .env -# 2. Remplir TEXT_MODEL_API_KEY dans .env -# 3. docker compose -f config/docker-compose.dictia8.yml up -d -# ============================================================================= - -services: - - # --------------------------------------------------------------------------- - # Application DictIA - # --------------------------------------------------------------------------- - dictia: - image: dictia:latest - container_name: dictia - restart: unless-stopped - ports: - - "8899:8899" - env_file: - - ../.env - environment: - - LOG_LEVEL=ERROR - volumes: - - ../uploads:/data/uploads - - ../instance:/data/instance - # Décommenter pour l'export automatique : - # - ../exports:/data/exports - # Décommenter pour le traitement automatique : - # - ../auto-process:/data/auto-process - depends_on: - - whisperx-asr - networks: - - dictia-net - - # --------------------------------------------------------------------------- - # WhisperX ASR — Transcription locale (WhisperX Large-v3) - # RTX 5060 : BATCH_SIZE=16, COMPUTE_TYPE=float16 - # --------------------------------------------------------------------------- - whisperx-asr: - image: murtazanasir/whisperx-asr-service:latest - container_name: whisperx-asr - restart: unless-stopped - environment: - - HF_TOKEN=${HF_TOKEN} - - DEVICE=cuda - - COMPUTE_TYPE=float16 - - BATCH_SIZE=16 - - DEFAULT_MODEL=large-v3 - volumes: - - whisperx-models:/root/.cache - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - networks: - - dictia-net - -networks: - dictia-net: - driver: bridge - -volumes: - whisperx-models: - driver: local diff --git a/deployment/profiles/env.dictia16.example b/deployment/profiles/env.dictia16.example deleted file mode 100644 index 8335fe1..0000000 --- a/deployment/profiles/env.dictia16.example +++ /dev/null @@ -1,134 +0,0 @@ -# ============================================================================= -# DictIA 16 — Configuration (.env) -# GPU : RTX 5070 Ti (16 Go VRAM) -# ============================================================================= -# -# Architecture : -# - Transcription : WhisperX Large-v3 (local, ~5,5 Go VRAM) -# - LLM (résumés) : Mistral 7B local via Ollama (~6,4 Go VRAM) -# - Mode : Séquentiel (transcription puis résumé) -# - Total VRAM : ~11,9 Go / 16 Go (marge ~4,1 Go) -# -# Démarrage rapide : -# 1. cp config/env.dictia16.example .env -# 2. Aucune clé API nécessaire — tout tourne en local -# 3. docker compose -f config/docker-compose.dictia16.yml up -d -# ============================================================================= - -# ============================================================================= -# MODÈLE DE TEXTE — Résumés, titres, chat (LLM LOCAL) -# ============================================================================= -# DictIA 16 utilise Mistral 7B en local via Ollama. -# Aucune donnée ne quitte le serveur — 100% privé. - -TEXT_MODEL_BASE_URL=http://ollama:11434/v1 -TEXT_MODEL_API_KEY=not-required -TEXT_MODEL_NAME=mistral - -# --- Modèle de chat séparé (optionnel) --- -# Même modèle par défaut, mais peut être changé pour un modèle plus rapide. -# CHAT_MODEL_API_KEY=not-required -# CHAT_MODEL_BASE_URL=http://ollama:11434/v1 -# CHAT_MODEL_NAME=mistral - -# ============================================================================= -# TRANSCRIPTION — WhisperX ASR local (REQUIS) -# ============================================================================= -# WhisperX tourne en local dans un conteneur Docker séparé. -# Le service ASR est défini dans docker-compose.dictia16.yml. - -ASR_BASE_URL=http://whisperx-asr:9000 - -# Diarisation (identification automatique des locuteurs) — recommandé -ASR_DIARIZE=true -ASR_RETURN_SPEAKER_EMBEDDINGS=true - -# Nombre de locuteurs attendus (optionnel — aide la précision) -# ASR_MIN_SPEAKERS=1 -# ASR_MAX_SPEAKERS=6 - -# ============================================================================= -# PARAMÈTRES ADMINISTRATEUR -# ============================================================================= -ADMIN_USERNAME=admin -ADMIN_EMAIL=admin@votreentreprise.com -ADMIN_PASSWORD=changeme - -# ============================================================================= -# ACCÈS ET INSCRIPTION -# ============================================================================= -# Désactiver l'inscription publique (accès sur invitation uniquement) -ALLOW_REGISTRATION=false - -# Restreindre l'inscription aux domaines autorisés -# Exemple : REGISTRATION_ALLOWED_DOMAINS=votreentreprise.com -REGISTRATION_ALLOWED_DOMAINS= - -# ============================================================================= -# FUSEAU HORAIRE -# ============================================================================= -# Exemples : America/Toronto, America/Montreal, America/New_York, UTC -TIMEZONE="America/Toronto" - -# ============================================================================= -# LIMITES DE TOKENS -# ============================================================================= -SUMMARY_MAX_TOKENS=8000 -CHAT_MAX_TOKENS=5000 - -# ============================================================================= -# COMPRESSION AUDIO -# ============================================================================= -AUDIO_COMPRESS_UPLOADS=true -AUDIO_CODEC=mp3 -AUDIO_BITRATE=128k - -# ============================================================================= -# FONCTIONNALITÉS OPTIONNELLES -# ============================================================================= - -# Inquire Mode — recherche IA sur tous les enregistrements -# Peut être activé sur DictIA 16 (plus de VRAM disponible) -ENABLE_INQUIRE_MODE=false - -# Traitement automatique de fichiers (dossier surveillé) -ENABLE_AUTO_PROCESSING=false -# AUTO_PROCESS_MODE=admin_only -# AUTO_PROCESS_WATCH_DIR=/data/auto-process - -# Export automatique -ENABLE_AUTO_EXPORT=false -# AUTO_EXPORT_DIR=/data/exports -# AUTO_EXPORT_TRANSCRIPTION=true -# AUTO_EXPORT_SUMMARY=true - -# Suppression automatique / rétention -ENABLE_AUTO_DELETION=false -# GLOBAL_RETENTION_DAYS=90 -# DELETION_MODE=audio_only - -# ============================================================================= -# PARTAGE -# ============================================================================= -ENABLE_INTERNAL_SHARING=false -ENABLE_PUBLIC_SHARING=true -USERS_CAN_DELETE=true - -# ============================================================================= -# FILES D'ATTENTE DE TRAITEMENT -# ============================================================================= -JOB_QUEUE_WORKERS=2 -SUMMARY_QUEUE_WORKERS=2 -JOB_MAX_RETRIES=3 - -# ============================================================================= -# BASE DE DONNÉES ET STOCKAGE -# ============================================================================= -SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db -UPLOAD_FOLDER=/data/uploads - -# ============================================================================= -# JOURNALISATION -# ============================================================================= -# ERROR = production (minimal), INFO = débogage, DEBUG = développement -LOG_LEVEL=ERROR diff --git a/deployment/profiles/env.dictia8.example b/deployment/profiles/env.dictia8.example deleted file mode 100644 index 3efbbe5..0000000 --- a/deployment/profiles/env.dictia8.example +++ /dev/null @@ -1,126 +0,0 @@ -# ============================================================================= -# DictIA 8 — Configuration (.env) -# GPU : RTX 5060 (8 Go VRAM) -# ============================================================================= -# -# Architecture : -# - Transcription : WhisperX Large-v3 (local, ~5,5 Go VRAM) -# - LLM (résumés) : API cloud via OpenRouter (VRAM insuffisante pour LLM local) -# -# Démarrage rapide : -# 1. cp config/env.dictia8.example .env -# 2. Remplir TRANSCRIPTION_API_KEY et TEXT_MODEL_API_KEY -# 3. docker compose -f config/docker-compose.dictia8.yml up -d -# ============================================================================= - -# ============================================================================= -# MODÈLE DE TEXTE — Résumés, titres, chat (REQUIS) -# ============================================================================= -# DictIA 8 utilise un LLM cloud via OpenRouter (VRAM insuffisante pour LLM local). -# Inscrivez-vous sur https://openrouter.ai pour obtenir une clé API. - -TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1 -TEXT_MODEL_API_KEY=votre_cle_openrouter -TEXT_MODEL_NAME=openai/gpt-4o-mini - -# ============================================================================= -# TRANSCRIPTION — WhisperX ASR local (REQUIS) -# ============================================================================= -# WhisperX tourne en local dans un conteneur Docker séparé. -# Le service ASR est défini dans docker-compose.dictia8.yml. - -ASR_BASE_URL=http://whisperx-asr:9000 - -# Diarisation (identification automatique des locuteurs) — recommandé -ASR_DIARIZE=true -ASR_RETURN_SPEAKER_EMBEDDINGS=true - -# Nombre de locuteurs attendus (optionnel — aide la précision) -# ASR_MIN_SPEAKERS=1 -# ASR_MAX_SPEAKERS=6 - -# ============================================================================= -# PARAMÈTRES ADMINISTRATEUR -# ============================================================================= -ADMIN_USERNAME=admin -ADMIN_EMAIL=admin@votreentreprise.com -ADMIN_PASSWORD=changeme - -# ============================================================================= -# ACCÈS ET INSCRIPTION -# ============================================================================= -# Désactiver l'inscription publique (accès sur invitation uniquement) -ALLOW_REGISTRATION=false - -# Restreindre l'inscription aux domaines autorisés -# Exemple : REGISTRATION_ALLOWED_DOMAINS=votreentreprise.com -REGISTRATION_ALLOWED_DOMAINS= - -# ============================================================================= -# FUSEAU HORAIRE -# ============================================================================= -# Exemples : America/Toronto, America/Montreal, America/New_York, UTC -TIMEZONE="America/Toronto" - -# ============================================================================= -# LIMITES DE TOKENS -# ============================================================================= -SUMMARY_MAX_TOKENS=8000 -CHAT_MAX_TOKENS=5000 - -# ============================================================================= -# COMPRESSION AUDIO -# ============================================================================= -AUDIO_COMPRESS_UPLOADS=true -AUDIO_CODEC=mp3 -AUDIO_BITRATE=128k - -# ============================================================================= -# FONCTIONNALITÉS OPTIONNELLES -# ============================================================================= - -# Inquire Mode — recherche IA sur tous les enregistrements -# Désactivé sur DictIA 8 (VRAM insuffisante pour embeddings locaux) -ENABLE_INQUIRE_MODE=false - -# Traitement automatique de fichiers (dossier surveillé) -ENABLE_AUTO_PROCESSING=false -# AUTO_PROCESS_MODE=admin_only -# AUTO_PROCESS_WATCH_DIR=/data/auto-process - -# Export automatique -ENABLE_AUTO_EXPORT=false -# AUTO_EXPORT_DIR=/data/exports -# AUTO_EXPORT_TRANSCRIPTION=true -# AUTO_EXPORT_SUMMARY=true - -# Suppression automatique / rétention -ENABLE_AUTO_DELETION=false -# GLOBAL_RETENTION_DAYS=90 -# DELETION_MODE=audio_only - -# ============================================================================= -# PARTAGE -# ============================================================================= -ENABLE_INTERNAL_SHARING=false -ENABLE_PUBLIC_SHARING=true -USERS_CAN_DELETE=true - -# ============================================================================= -# FILES D'ATTENTE DE TRAITEMENT -# ============================================================================= -JOB_QUEUE_WORKERS=2 -SUMMARY_QUEUE_WORKERS=2 -JOB_MAX_RETRIES=3 - -# ============================================================================= -# BASE DE DONNÉES ET STOCKAGE -# ============================================================================= -SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db -UPLOAD_FOLDER=/data/uploads - -# ============================================================================= -# JOURNALISATION -# ============================================================================= -# ERROR = production (minimal), INFO = débogage, DEBUG = développement -LOG_LEVEL=ERROR diff --git a/deployment/security/docker-daemon.json b/deployment/security/docker-daemon.json deleted file mode 100644 index 217a460..0000000 --- a/deployment/security/docker-daemon.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "log-driver": "json-file", - "log-opts": { - "max-size": "10m", - "max-file": "3" - }, - "storage-driver": "overlay2" -} diff --git a/deployment/security/docker-iptables.service b/deployment/security/docker-iptables.service deleted file mode 100644 index 5a78b28..0000000 --- a/deployment/security/docker-iptables.service +++ /dev/null @@ -1,12 +0,0 @@ -[Unit] -Description=DictIA Docker iptables rules -After=docker.service tailscaled.service -Requires=docker.service - -[Service] -Type=oneshot -RemainAfterExit=yes -ExecStart=/bin/bash /opt/dictia/deployment/security/iptables-rules.sh - -[Install] -WantedBy=multi-user.target diff --git a/deployment/security/iptables-rules.sh b/deployment/security/iptables-rules.sh deleted file mode 100644 index 376cd7c..0000000 --- a/deployment/security/iptables-rules.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env bash -# DictIA — iptables rules for cloud VPS -# -# Allows Docker internal traffic to reach the ASR proxy on port 9090. -# Blocks direct external access to Docker container IPs. -# Tailscale + UFW handle the main firewall — this script adds Docker-specific rules. -# -# Usage: sudo bash iptables-rules.sh -set -euo pipefail - -echo "=== DictIA iptables rules ===" - -# Allow Docker containers (172.16.0.0/12) to reach ASR proxy on port 9090 -# This rule goes BEFORE the default DROP policy so containers can talk to the proxy -iptables -C INPUT -s 172.16.0.0/12 -p tcp --dport 9090 -j ACCEPT 2>/dev/null \ - || iptables -I INPUT 1 -s 172.16.0.0/12 -p tcp --dport 9090 -j ACCEPT - -# Block direct external access to Docker container IPs (raw table, before conntrack) -# Protects containers on non-default bridge networks (e.g., dictia-network) -for NETWORK_ID in $(docker network ls --filter driver=bridge --format '{{.ID}}' 2>/dev/null); do - BRIDGE=$(docker network inspect "$NETWORK_ID" --format '{{.Options.com.docker.network.bridge.name}}' 2>/dev/null || echo "") - [ -z "$BRIDGE" ] && continue - [ "$BRIDGE" = "docker0" ] && continue - - for CONTAINER_IP in $(docker network inspect "$NETWORK_ID" \ - --format '{{range .Containers}}{{.IPv4Address}} {{end}}' 2>/dev/null); do - IP="${CONTAINER_IP%/*}" - [ -z "$IP" ] && continue - iptables -t raw -C PREROUTING -d "$IP" ! -i "$BRIDGE" -j DROP 2>/dev/null \ - || iptables -t raw -A PREROUTING -d "$IP" ! -i "$BRIDGE" -j DROP - echo " Protected $IP on $BRIDGE" - done -done - -echo "Rules applied. Tailscale + Docker internal traffic allowed." -echo "Verify with: sudo iptables -L -n -t raw" diff --git a/deployment/setup.sh b/deployment/setup.sh deleted file mode 100755 index dbf7fe3..0000000 --- a/deployment/setup.sh +++ /dev/null @@ -1,300 +0,0 @@ -#!/usr/bin/env bash -# DictIA — Main setup script -# -# Interactive installer that detects hardware and configures the appropriate -# deployment profile (cloud, local-cpu, local-gpu). -# -# Usage: -# bash deployment/setup.sh # Interactive mode -# bash deployment/setup.sh --profile cloud # Non-interactive -# bash deployment/setup.sh --profile local-gpu -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" -PROFILE="" - -for arg in "$@"; do - case "$arg" in - --profile=*) PROFILE="${arg#*=}" ;; - --profile) shift_next=true ;; - *) - if [ "${shift_next:-false}" = true ]; then - PROFILE="$arg" - shift_next=false - fi - ;; - esac -done - -# --- Colors --- -RED='\033[0;31m' -GREEN='\033[0;32m' -YELLOW='\033[1;33m' -CYAN='\033[0;36m' -NC='\033[0m' - -info() { echo -e "${CYAN}[INFO]${NC} $*"; } -ok() { echo -e "${GREEN}[OK]${NC} $*"; } -warn() { echo -e "${YELLOW}[WARN]${NC} $*"; } -err() { echo -e "${RED}[ERROR]${NC} $*"; } - -echo -echo -e "${CYAN}========================================${NC}" -echo -e "${CYAN} DictIA — Setup${NC}" -echo -e "${CYAN}========================================${NC}" -echo - -# ========================================================================== -# 1. Hardware Detection -# ========================================================================== -info "Detecting hardware..." - -# Docker -if command -v docker &>/dev/null && docker info &>/dev/null; then - DOCKER_VERSION=$(docker --version | grep -oP '\d+\.\d+\.\d+' | head -1) - ok "Docker $DOCKER_VERSION" -else - err "Docker not found or not running." - echo " Install Docker: https://docs.docker.com/engine/install/" - exit 1 -fi - -# Docker Compose -if docker compose version &>/dev/null; then - COMPOSE_VERSION=$(docker compose version --short 2>/dev/null || echo "unknown") - ok "Docker Compose $COMPOSE_VERSION" -else - err "Docker Compose not found." - echo " Docker Compose V2 is required (comes with Docker Desktop or docker-compose-plugin)" - exit 1 -fi - -# GPU -HAS_GPU=false -if command -v nvidia-smi &>/dev/null; then - GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1 || echo "") - if [ -n "$GPU_NAME" ]; then - HAS_GPU=true - ok "NVIDIA GPU: $GPU_NAME" - # Check nvidia-container-toolkit - if docker info 2>/dev/null | grep -qi nvidia; then - ok "nvidia-container-toolkit detected" - else - warn "nvidia-container-toolkit not detected. Required for local-gpu profile." - echo " Install: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html" - fi - fi -else - info "No NVIDIA GPU detected" -fi - -# RAM -if command -v free &>/dev/null; then - RAM_GB=$(free -g | awk '/Mem:/{print $2}') - info "RAM: ${RAM_GB}GB" -fi - -# Disk -DISK_AVAIL=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $4}') -info "Disk available: $DISK_AVAIL" - -echo - -# ========================================================================== -# 2. Profile Selection -# ========================================================================== -if [ -z "$PROFILE" ]; then - echo -e "${CYAN}Select deployment profile:${NC}" - echo - echo " 1) cloud — VPS with ASR Proxy (GCP GPU on demand)" - echo " Best for: remote servers, pay-per-use GPU" - echo - echo " 2) local-gpu — Local NVIDIA GPU for transcription" - echo " Best for: dedicated GPU server, fastest" - if [ "$HAS_GPU" = false ]; then - echo -e " ${YELLOW}(No GPU detected on this machine)${NC}" - fi - echo - echo " 3) local-cpu — CPU-only transcription (slow)" - echo " Best for: testing, low-volume usage" - echo - read -rp "Choice [1-3]: " CHOICE - case "$CHOICE" in - 1) PROFILE="cloud" ;; - 2) PROFILE="local-gpu" ;; - 3) PROFILE="local-cpu" ;; - *) err "Invalid choice"; exit 1 ;; - esac -fi - -COMPOSE_FILE="$SCRIPT_DIR/docker/docker-compose.$PROFILE.yml" -if [ ! -f "$COMPOSE_FILE" ]; then - err "Compose file not found: $COMPOSE_FILE" - exit 1 -fi - -ok "Profile: $PROFILE" -echo - -# ========================================================================== -# 3. Generate .env -# ========================================================================== -ENV_FILE="$PROJECT_DIR/.env" - -if [ -f "$ENV_FILE" ]; then - warn ".env already exists. Keeping existing configuration." - echo " To reconfigure, delete .env and re-run setup." -else - info "Generating .env..." - - # Generate secret key - SECRET_KEY=$(python3 -c "import secrets; print(secrets.token_hex(32))" 2>/dev/null \ - || openssl rand -hex 32 2>/dev/null \ - || head -c 64 /dev/urandom | xxd -p | head -c 64) - - # Prompt for admin credentials - read -rp "Admin username [admin]: " ADMIN_USER - ADMIN_USER="${ADMIN_USER:-admin}" - read -rp "Admin email [admin@example.com]: " ADMIN_EMAIL - ADMIN_EMAIL="${ADMIN_EMAIL:-admin@example.com}" - read -rsp "Admin password: " ADMIN_PASS - echo - ADMIN_PASS="${ADMIN_PASS:-changeme}" - - # Prompt for text model API key - echo - info "DictIA needs a text/LLM API key for summaries, titles, and chat." - echo " Recommended: OpenRouter (https://openrouter.ai) — access to many models" - read -rp "Text model API key (or press Enter to skip): " TEXT_API_KEY - TEXT_API_KEY="${TEXT_API_KEY:-your_openrouter_api_key}" - - # HuggingFace token for diarization - if [ "$PROFILE" != "cloud" ]; then - echo - info "For speaker diarization, a HuggingFace token is needed." - echo " Get one at: https://huggingface.co/settings/tokens" - echo " Accept model: https://huggingface.co/pyannote/speaker-diarization-3.1" - read -rp "HuggingFace token (or press Enter to skip): " HF_TOKEN - HF_TOKEN="${HF_TOKEN:-}" - else - HF_TOKEN="" - fi - - # Write .env - cp "$SCRIPT_DIR/docker/.env.example" "$ENV_FILE" - sed -i "s|SECRET_KEY=.*|SECRET_KEY=$SECRET_KEY|" "$ENV_FILE" - sed -i "s|DICTIA_PROFILE=.*|DICTIA_PROFILE=$PROFILE|" "$ENV_FILE" - sed -i "s|ADMIN_USERNAME=.*|ADMIN_USERNAME=$ADMIN_USER|" "$ENV_FILE" - sed -i "s|ADMIN_EMAIL=.*|ADMIN_EMAIL=$ADMIN_EMAIL|" "$ENV_FILE" - sed -i "s|ADMIN_PASSWORD=.*|ADMIN_PASSWORD=$ADMIN_PASS|" "$ENV_FILE" - sed -i "s|TEXT_MODEL_API_KEY=.*|TEXT_MODEL_API_KEY=$TEXT_API_KEY|" "$ENV_FILE" - sed -i "s|HF_TOKEN=.*|HF_TOKEN=$HF_TOKEN|" "$ENV_FILE" - - ok ".env generated" -fi -echo - -# ========================================================================== -# 4. Create data directories -# ========================================================================== -info "Creating data directories..." -mkdir -p "$PROJECT_DIR/data/uploads" "$PROJECT_DIR/data/instance" -ok "data/uploads and data/instance created" -echo - -# ========================================================================== -# 5. Profile-specific setup -# ========================================================================== -case "$PROFILE" in - cloud) - info "Cloud profile — setting up ASR Proxy..." - if [ -f "$SCRIPT_DIR/asr-proxy/setup.sh" ]; then - echo " Run the ASR proxy setup separately:" - echo " bash $SCRIPT_DIR/asr-proxy/setup.sh" - fi - echo - info "Setting up iptables rules..." - if [ -f "$SCRIPT_DIR/security/iptables-rules.sh" ] && [ "$(id -u)" -eq 0 ]; then - bash "$SCRIPT_DIR/security/iptables-rules.sh" - else - echo " Run as root: sudo bash $SCRIPT_DIR/security/iptables-rules.sh" - fi - echo - info "Setting up Tailscale Serve..." - if command -v tailscale &>/dev/null; then - echo " Run: bash $SCRIPT_DIR/config/tailscale/setup-serve.sh" - else - warn "Tailscale not installed." - echo " Install: curl -fsSL https://tailscale.com/install.sh | sh" - fi - ;; - local-gpu) - info "Local GPU profile — verifying NVIDIA runtime..." - if docker info 2>/dev/null | grep -qi nvidia; then - ok "NVIDIA Docker runtime available" - # Quick GPU test - if docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi &>/dev/null; then - ok "GPU test passed" - else - warn "GPU test failed. Check nvidia-container-toolkit installation." - fi - else - err "NVIDIA Docker runtime not found." - echo " Install nvidia-container-toolkit and restart Docker." - echo " https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html" - fi - ;; - local-cpu) - warn "CPU-only transcription is significantly slower than GPU." - echo " Expect ~10x real-time (1h audio = ~10h processing)." - echo " Consider local-gpu or cloud profile for better performance." - ;; -esac - -echo - -# ========================================================================== -# 6. Build and start -# ========================================================================== -info "Building DictIA Docker image..." -cd "$PROJECT_DIR" -docker build -t innova-ai/dictia:latest . -ok "Image built" - -echo -info "Starting DictIA ($PROFILE profile)..." -docker compose -f "$COMPOSE_FILE" up -d -ok "Containers started" - -# ========================================================================== -# 7. Health check -# ========================================================================== -echo -info "Waiting for DictIA to become healthy..." -RETRIES=30 -for i in $(seq 1 $RETRIES); do - if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then - ok "DictIA is healthy!" - break - fi - if [ "$i" -eq "$RETRIES" ]; then - warn "Health check timeout. Check logs: docker compose -f $COMPOSE_FILE logs" - fi - sleep 5 -done - -echo -echo -e "${GREEN}========================================${NC}" -echo -e "${GREEN} DictIA is ready!${NC}" -echo -e "${GREEN}========================================${NC}" -echo -echo " App: http://localhost:8899" -echo " Profile: $PROFILE" -echo " Compose: $COMPOSE_FILE" -echo -echo " Tools:" -echo " Update: bash deployment/tools/update.sh" -echo " Backup: bash deployment/tools/backup.sh" -echo " Health check: bash deployment/tools/health-check.sh" -echo diff --git a/deployment/tools/backup.sh b/deployment/tools/backup.sh deleted file mode 100644 index 17ee50a..0000000 --- a/deployment/tools/backup.sh +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env bash -# DictIA — Backup script -# -# Creates a timestamped backup of data, env, and Docker volumes. -# Keeps the last N backups (default: 5). -# -# Usage: bash backup.sh [BACKUP_DIR] -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" -BACKUP_BASE="${1:-$PROJECT_DIR/backups}" -KEEP_COUNT=5 -TIMESTAMP=$(date +%Y%m%d-%H%M%S) -BACKUP_DIR="$BACKUP_BASE/dictia-$TIMESTAMP" - -echo "=== DictIA Backup ===" -echo "Project: $PROJECT_DIR" -echo "Backup: $BACKUP_DIR" -echo - -mkdir -p "$BACKUP_DIR" - -# 1. Data directory -if [ -d "$PROJECT_DIR/data" ]; then - echo "[1/4] Backing up data/..." - cp -a "$PROJECT_DIR/data" "$BACKUP_DIR/data" -else - echo "[1/4] No data/ directory found, skipping." -fi - -# 2. Environment file -if [ -f "$PROJECT_DIR/.env" ]; then - echo "[2/4] Backing up .env..." - cp "$PROJECT_DIR/.env" "$BACKUP_DIR/dot-env" -else - echo "[2/4] No .env found, skipping." -fi - -# 3. ASR Proxy stats -ASR_STATS="$PROJECT_DIR/deployment/asr-proxy/usage-stats.json" -if [ -f "$ASR_STATS" ]; then - echo "[3/4] Backing up ASR proxy stats..." - cp "$ASR_STATS" "$BACKUP_DIR/asr-usage-stats.json" -else - echo "[3/4] No ASR proxy stats, skipping." -fi - -# 4. Docker volumes (if using managed volumes) -echo "[4/4] Checking Docker volumes..." -if docker volume ls --format '{{.Name}}' 2>/dev/null | grep -q "whisperx-cache"; then - echo " Exporting whisperx-cache volume..." - docker run --rm -v whisperx-cache:/source -v "$BACKUP_DIR":/backup \ - alpine tar czf /backup/whisperx-cache.tar.gz -C /source . 2>/dev/null || true -fi - -# Write manifest -cat > "$BACKUP_DIR/manifest.json" </dev/null | wc -l) -if [ "$BACKUP_COUNT" -gt "$KEEP_COUNT" ]; then - echo - echo "Rotating backups (keeping last $KEEP_COUNT)..." - ls -1t "$BACKUP_BASE"/dictia-*.tar.gz | tail -n +"$((KEEP_COUNT + 1))" | xargs rm -f -fi - -echo -echo "=== Backup complete ===" diff --git a/deployment/tools/health-check.sh b/deployment/tools/health-check.sh deleted file mode 100644 index 8075289..0000000 --- a/deployment/tools/health-check.sh +++ /dev/null @@ -1,157 +0,0 @@ -#!/usr/bin/env bash -# DictIA — Health check diagnostic -# -# Checks Docker, containers, endpoints, disk, RAM, and GPU. -# -# Usage: -# bash health-check.sh # Human-readable output -# bash health-check.sh --json # JSON output -# bash health-check.sh --quiet # Exit code only (0=ok, 1=issue) -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" -OUTPUT="human" -ISSUES=0 - -for arg in "$@"; do - case "$arg" in - --json) OUTPUT="json" ;; - --quiet) OUTPUT="quiet" ;; - esac -done - -declare -A CHECKS - -check() { - local name="$1" - local status="$2" - local detail="${3:-}" - CHECKS["$name"]="$status|$detail" - if [ "$status" = "error" ] || [ "$status" = "warning" ]; then - ISSUES=$((ISSUES + 1)) - fi -} - -# --- Docker --- -if command -v docker &>/dev/null && docker info &>/dev/null; then - check "docker" "ok" "Docker daemon running" -else - check "docker" "error" "Docker not available" -fi - -# --- Containers --- -DICTIA_STATUS=$(docker inspect --format='{{.State.Health.Status}}' dictia 2>/dev/null || echo "not_found") -if [ "$DICTIA_STATUS" = "healthy" ]; then - check "container_dictia" "ok" "healthy" -elif [ "$DICTIA_STATUS" = "not_found" ]; then - check "container_dictia" "error" "container not found" -else - check "container_dictia" "warning" "$DICTIA_STATUS" -fi - -WHISPERX_STATUS=$(docker inspect --format='{{.State.Status}}' whisperx-asr 2>/dev/null || echo "not_found") -if [ "$WHISPERX_STATUS" = "running" ]; then - check "container_whisperx" "ok" "running" -elif [ "$WHISPERX_STATUS" = "not_found" ]; then - check "container_whisperx" "info" "not present (cloud profile?)" -else - check "container_whisperx" "warning" "$WHISPERX_STATUS" -fi - -# --- Endpoints --- -if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then - check "endpoint_dictia" "ok" "http://localhost:8899 responding" -else - check "endpoint_dictia" "error" "http://localhost:8899 not responding" -fi - -if curl -sf -o /dev/null -m 5 http://localhost:9000/health 2>/dev/null; then - check "endpoint_whisperx" "ok" "http://localhost:9000 responding" -else - check "endpoint_whisperx" "info" "http://localhost:9000 not responding" -fi - -if curl -sf -o /dev/null -m 5 http://localhost:9090/health 2>/dev/null; then - check "endpoint_asr_proxy" "ok" "http://localhost:9090 responding" -else - check "endpoint_asr_proxy" "info" "http://localhost:9090 not responding" -fi - -# --- Disk --- -DISK_USED=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $5}' | tr -d '%') -if [ -n "$DISK_USED" ]; then - if [ "$DISK_USED" -gt 90 ]; then - check "disk" "error" "${DISK_USED}% used" - elif [ "$DISK_USED" -gt 80 ]; then - check "disk" "warning" "${DISK_USED}% used" - else - check "disk" "ok" "${DISK_USED}% used" - fi -fi - -# --- RAM --- -if command -v free &>/dev/null; then - MEM_TOTAL=$(free -m | awk '/Mem:/{print $2}') - MEM_AVAIL=$(free -m | awk '/Mem:/{print $7}') - MEM_USED_PCT=$(( (MEM_TOTAL - MEM_AVAIL) * 100 / MEM_TOTAL )) - if [ "$MEM_USED_PCT" -gt 90 ]; then - check "memory" "warning" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)" - else - check "memory" "ok" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)" - fi -fi - -# --- GPU --- -if command -v nvidia-smi &>/dev/null; then - GPU_INFO=$(nvidia-smi --query-gpu=name,memory.used,memory.total --format=csv,noheader 2>/dev/null || echo "error") - if [ "$GPU_INFO" != "error" ]; then - check "gpu" "ok" "$GPU_INFO" - else - check "gpu" "warning" "nvidia-smi present but query failed" - fi -fi - -# --- Output --- -if [ "$OUTPUT" = "json" ]; then - echo "{" - echo " \"timestamp\": \"$(date -Is)\"," - echo " \"issues\": $ISSUES," - echo " \"checks\": {" - FIRST=true - for name in "${!CHECKS[@]}"; do - IFS='|' read -r status detail <<< "${CHECKS[$name]}" - if [ "$FIRST" = true ]; then - FIRST=false - else - echo "," - fi - printf ' "%s": {"status": "%s", "detail": "%s"}' "$name" "$status" "$detail" - done - echo - echo " }" - echo "}" -elif [ "$OUTPUT" = "quiet" ]; then - exit $( [ "$ISSUES" -eq 0 ] && echo 0 || echo 1 ) -else - echo "=== DictIA Health Check ===" - echo - for name in docker container_dictia container_whisperx endpoint_dictia endpoint_whisperx endpoint_asr_proxy disk memory gpu; do - if [ -n "${CHECKS[$name]+x}" ]; then - IFS='|' read -r status detail <<< "${CHECKS[$name]}" - case "$status" in - ok) ICON="[OK]" ;; - warning) ICON="[!!]" ;; - error) ICON="[ERR]" ;; - info) ICON="[--]" ;; - esac - printf " %-22s %s %s\n" "$name" "$ICON" "$detail" - fi - done - echo - if [ "$ISSUES" -eq 0 ]; then - echo "All checks passed." - else - echo "$ISSUES issue(s) found." - fi -fi diff --git a/deployment/tools/restore.sh b/deployment/tools/restore.sh deleted file mode 100644 index 4c9d46a..0000000 --- a/deployment/tools/restore.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env bash -# DictIA — Restore script -# -# Restores a DictIA backup archive created by backup.sh. -# -# Usage: bash restore.sh [PROJECT_DIR] -set -euo pipefail - -ARCHIVE="${1:-}" -PROJECT_DIR="${2:-$(cd "$(dirname "$0")/../.." && pwd)}" - -if [ -z "$ARCHIVE" ] || [ ! -f "$ARCHIVE" ]; then - echo "Usage: bash restore.sh [project-dir]" - echo - echo "Available backups:" - ls -1t "$PROJECT_DIR/backups"/dictia-*.tar.gz 2>/dev/null | head -5 || echo " (none found)" - exit 1 -fi - -echo "=== DictIA Restore ===" -echo "Archive: $ARCHIVE" -echo "Target: $PROJECT_DIR" -echo - -# Validate archive -echo "Validating archive..." -TMPDIR=$(mktemp -d) -tar xzf "$ARCHIVE" -C "$TMPDIR" -BACKUP_DIR=$(ls -1d "$TMPDIR"/dictia-* | head -1) - -if [ ! -f "$BACKUP_DIR/manifest.json" ]; then - echo "ERROR: Invalid backup archive (no manifest.json)" - rm -rf "$TMPDIR" - exit 1 -fi - -echo "Manifest:" -cat "$BACKUP_DIR/manifest.json" -echo -echo - -# Confirmation -read -rp "This will overwrite current data. Continue? [y/N] " CONFIRM -if [ "$CONFIRM" != "y" ] && [ "$CONFIRM" != "Y" ]; then - echo "Aborted." - rm -rf "$TMPDIR" - exit 0 -fi - -# Stop services -echo -echo "Stopping DictIA services..." -COMPOSE_FILE="" -for f in cloud local-cpu local-gpu; do - if [ -f "$PROJECT_DIR/deployment/docker/docker-compose.$f.yml" ]; then - COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml" - fi -done -if [ -n "$COMPOSE_FILE" ]; then - docker compose -f "$COMPOSE_FILE" down 2>/dev/null || true -fi - -# Restore data -if [ -d "$BACKUP_DIR/data" ]; then - echo "Restoring data/..." - rm -rf "$PROJECT_DIR/data" - cp -a "$BACKUP_DIR/data" "$PROJECT_DIR/data" -fi - -# Restore .env -if [ -f "$BACKUP_DIR/dot-env" ]; then - echo "Restoring .env..." - cp "$BACKUP_DIR/dot-env" "$PROJECT_DIR/.env" -fi - -# Restore ASR stats -if [ -f "$BACKUP_DIR/asr-usage-stats.json" ]; then - echo "Restoring ASR proxy stats..." - cp "$BACKUP_DIR/asr-usage-stats.json" "$PROJECT_DIR/deployment/asr-proxy/usage-stats.json" -fi - -# Restore Docker volumes -if [ -f "$BACKUP_DIR/whisperx-cache.tar.gz" ]; then - echo "Restoring whisperx-cache volume..." - docker volume create whisperx-cache 2>/dev/null || true - docker run --rm -v whisperx-cache:/target -v "$BACKUP_DIR":/backup \ - alpine sh -c "cd /target && tar xzf /backup/whisperx-cache.tar.gz" 2>/dev/null || true -fi - -# Cleanup -rm -rf "$TMPDIR" - -# Restart services -echo -echo "Restarting DictIA..." -if [ -n "$COMPOSE_FILE" ]; then - docker compose -f "$COMPOSE_FILE" up -d -fi - -echo -echo "=== Restore complete ===" diff --git a/deployment/tools/update.sh b/deployment/tools/update.sh deleted file mode 100644 index 54be4b7..0000000 --- a/deployment/tools/update.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env bash -# DictIA — Update script -# -# Pulls latest code, rebuilds Docker image, and restarts services. -# Detects the active deployment profile automatically. -# -# Usage: bash update.sh [--no-pull] [--no-build] -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" -NO_PULL=false -NO_BUILD=false - -for arg in "$@"; do - case "$arg" in - --no-pull) NO_PULL=true ;; - --no-build) NO_BUILD=true ;; - *) echo "Unknown option: $arg"; exit 1 ;; - esac -done - -echo "=== DictIA Update ===" -echo "Project: $PROJECT_DIR" -echo - -# 1. Detect active compose file -COMPOSE_FILE="" -PROFILE="" -for f in cloud local-cpu local-gpu; do - CF="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml" - if [ -f "$CF" ] && docker compose -f "$CF" ps --quiet 2>/dev/null | grep -q .; then - COMPOSE_FILE="$CF" - PROFILE="$f" - break - fi -done - -if [ -z "$COMPOSE_FILE" ]; then - # Fallback: check .env for profile - if [ -f "$PROJECT_DIR/.env" ]; then - PROFILE=$(grep -E '^DICTIA_PROFILE=' "$PROJECT_DIR/.env" 2>/dev/null | cut -d= -f2 || echo "cloud") - fi - PROFILE="${PROFILE:-cloud}" - COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$PROFILE.yml" -fi - -echo "Profile: $PROFILE" -echo "Compose: $COMPOSE_FILE" -echo - -# 2. Git pull -if [ "$NO_PULL" = false ]; then - echo "[1/5] Pulling latest code..." - cd "$PROJECT_DIR" - git pull origin dictia-branding -else - echo "[1/5] Skipping git pull (--no-pull)" -fi - -# 3. Rebuild DictIA image -if [ "$NO_BUILD" = false ]; then - echo "[2/5] Building DictIA image..." - cd "$PROJECT_DIR" - docker build -t innova-ai/dictia:latest . -else - echo "[2/5] Skipping build (--no-build)" -fi - -# 3b. Pull upstream images (WhisperX) if local profile -if [ "$PROFILE" != "cloud" ] && [ "$NO_BUILD" = false ]; then - echo "[3/5] Pulling upstream images (WhisperX)..." - docker compose -f "$COMPOSE_FILE" pull whisperx-asr 2>/dev/null || true -else - echo "[3/5] Skipping upstream pull (cloud profile or --no-build)" -fi - -# 4. Restart containers -echo "[4/5] Restarting containers..." -docker compose -f "$COMPOSE_FILE" down -docker compose -f "$COMPOSE_FILE" up -d - -# 5. Wait for health -echo "[5/5] Waiting for health check..." -RETRIES=30 -for i in $(seq 1 $RETRIES); do - if docker compose -f "$COMPOSE_FILE" ps | grep -q "healthy"; then - echo " DictIA is healthy!" - break - fi - if [ "$i" -eq "$RETRIES" ]; then - echo " WARNING: Health check timeout. Check: docker compose -f $COMPOSE_FILE logs" - fi - sleep 5 -done - -# Cleanup dangling images -echo -echo "Cleaning up old images..." -docker image prune -f 2>/dev/null || true - -echo -echo "=== Update complete ===" -echo "DictIA: http://localhost:8899" -docker compose -f "$COMPOSE_FILE" ps