Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)

2026-03-16 21:47:37 +00:00
commit 42772a31ed
365 changed files with 103572 additions and 0 deletions
--- a/deployment/README.md
+++ b/deployment/README.md
@@ -0,0 +1,105 @@
+# DictIA — Deployment Infrastructure
+
+Infrastructure de deploiement reproductible pour DictIA .
+
+## Choix de profil
+
+```
+Quel est ton setup?
+    |
+    +-- VPS / serveur cloud?
+    |   --> cloud (ASR Proxy GCP GPU on demand)
+    |
+    +-- Machine locale avec GPU NVIDIA?
+    |   --> local-gpu (WhisperX sur GPU, le plus rapide)
+    |
+    +-- Machine locale sans GPU?
+        --> local-cpu (WhisperX sur CPU, lent mais fonctionnel)
+```
+
+## Quickstart
+
+```bash
+git clone https://gitea.innova-ai.ca/Innova-AI/dictia-public.git
+cd dictia
+git checkout dictia-branding
+bash deployment/setup.sh
+```
+
+Le script detecte le hardware et guide l'installation.
+
+## Architecture
+
+```
+deployment/
+├── setup.sh                  # Installateur principal
+├── docker/
+│   ├── docker-compose.cloud.yml
+│   ├── docker-compose.local-cpu.yml
+│   ├── docker-compose.local-gpu.yml
+│   └── .env.example
+├── asr-proxy/                # Proxy GCP GPU (cloud seulement)
+│   ├── proxy.py
+│   ├── dashboard.html
+│   ├── requirements.txt
+│   ├── setup.sh
+│   └── asr-proxy.service
+├── security/                 # Securite Docker (cloud)
+│   ├── docker-daemon.json
+│   ├── iptables-rules.sh
+│   └── docker-iptables.service
+├── config/
+│   ├── nginx/dictia.conf
+│   ├── tailscale/setup-serve.sh
+│   └── systemd/dictia.service
+├── tools/
+│   ├── backup.sh
+│   ├── restore.sh
+│   ├── update.sh
+│   └── health-check.sh
+└── docs/
+    ├── QUICKSTART.md
+    ├── VPS-SETUP.md
+    ├── LOCAL-SETUP.md
+    ├── MAINTENANCE.md
+    └── TROUBLESHOOTING.md
+```
+
+### Profil Cloud
+
+```
+Internet --> Tailscale --> VPS
+                           |
+                      DictIA :8899
+                           |
+                      ASR Proxy :9090
+                           |
+                      GCP GPU (auto start/stop)
+                           |
+                      WhisperX :9000
+```
+
+### Profil Local GPU/CPU
+
+```
+localhost:8899 --> DictIA container
+                       |
+                  WhisperX container :9000
+                       |
+                  GPU local (ou CPU)
+```
+
+## Documentation
+
+- [QUICKSTART.md](docs/QUICKSTART.md) — Demarrage rapide par profil
+- [VPS-SETUP.md](docs/VPS-SETUP.md) — Setup VPS complet from scratch
+- [LOCAL-SETUP.md](docs/LOCAL-SETUP.md) — Setup local GPU/CPU
+- [MAINTENANCE.md](docs/MAINTENANCE.md) — Backup, restore, update, monitoring
+- [TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) — Problemes courants + solutions
+
+## Mise a jour upstream
+
+Tous les fichiers dans `deployment/` sont specifiques a DictIA.
+Aucun conflit lors des merges upstream, sauf `deployment/setup.sh`
+(qui remplace le setup.sh original de Speakr).
+
--- a/deployment/asr-proxy/.gitignore
+++ b/deployment/asr-proxy/.gitignore
@@ -0,0 +1,5 @@
+gcp-credentials.json
+usage-stats.json
+venv/
+__pycache__/
+*.pyc
--- a/deployment/asr-proxy/asr-proxy.service
+++ b/deployment/asr-proxy/asr-proxy.service
@@ -0,0 +1,22 @@
+# TEMPLATE — Ne pas copier directement dans /etc/systemd/system/.
+# Les variables ${ASR_PROXY_USER} et ${ASR_PROXY_DIR} sont des placeholders.
+# Le fichier service réel est généré par setup.sh (via heredoc bash) avec les
+# valeurs résolues de $SERVICE_USER et $INSTALL_DIR.
+# Usage : sudo bash setup.sh  (installe et active le service automatiquement)
+
+[Unit]
+Description=DictIA ASR Proxy - GPU Auto-Start/Stop for WhisperX
+After=network.target
+
+[Service]
+Type=simple
+User=${ASR_PROXY_USER}
+Restart=always
+RestartSec=10
+WorkingDirectory=${ASR_PROXY_DIR}
+ExecStart=${ASR_PROXY_DIR}/venv/bin/python proxy.py
+Environment=GOOGLE_APPLICATION_CREDENTIALS=${ASR_PROXY_DIR}/gcp-credentials.json
+Environment=STATS_FILE=${ASR_PROXY_DIR}/usage-stats.json
+
+[Install]
+WantedBy=multi-user.target
--- a/deployment/asr-proxy/dashboard.html
+++ b/deployment/asr-proxy/dashboard.html
--- a/deployment/asr-proxy/proxy.py
+++ b/deployment/asr-proxy/proxy.py
@@ -0,0 +1,741 @@
+"""DictIA ASR Proxy - Auto-start/stop GCP GPU for WhisperX + Ollama.
+
+Uses Google Cloud Compute REST API directly (no gcloud CLI needed).
+Proxies both ASR (WhisperX) and LLM (Ollama) requests.
+Multi-zone fallback across Canada (Montreal + Toronto).
+"""
+
+import asyncio
+import json
+import logging
+import os
+import time
+
+import httpx
+import jwt as pyjwt
+from pathlib import Path
+
+from fastapi import FastAPI, Request
+from fastapi.responses import HTMLResponse, JSONResponse, Response
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+log = logging.getLogger("asr-proxy")
+
+# Config — paths relative to this script's directory by default
+SCRIPT_DIR = Path(__file__).parent
+GCP_PROJECT = os.getenv("GCP_PROJECT", "speakr-gpu")
+WHISPERX_PORT = int(os.getenv("WHISPERX_PORT", "9000"))
+OLLAMA_PORT = int(os.getenv("OLLAMA_PORT", "11434"))
+IDLE_TIMEOUT = int(os.getenv("IDLE_TIMEOUT", "300"))
+CREDS_FILE = os.getenv("GOOGLE_APPLICATION_CREDENTIALS", str(SCRIPT_DIR / "gcp-credentials.json"))
+STATS_FILE = os.getenv("STATS_FILE", str(SCRIPT_DIR / "usage-stats.json"))
+MONTHLY_LIMIT_HOURS = float(os.getenv("MONTHLY_LIMIT_HOURS", "30"))
+# Real GCP cost per GPU-hour (g2-standard-4 + L4): GPU ($0.837) + vCPU ($0.151) + RAM ($0.069)
+GPU_COST_PER_HOUR = float(os.getenv("GPU_COST_PER_HOUR", "1.06"))
+# Fixed monthly costs: SSD disks ($5.66) + snapshots ($4.19) ≈ $9.85/month
+FIXED_MONTHLY_COST = float(os.getenv("FIXED_MONTHLY_COST", "9.85"))
+SNAPSHOT_NAME = "whisperx-gpu-snapshot"
+HEALTH_POLL_INTERVAL = 5
+BOOT_TIMEOUT = 300
+
+# Zone fallback order — Canada only, Montreal first
+ZONE_FALLBACKS = [
+    {
+        "zone": "northamerica-northeast1-b",
+        "instance": "whisperx-gpu-mtl1",
+        "machine_type": "g2-standard-4",
+        "accelerator": "nvidia-l4",
+        "accel_count": 1,
+        "label": "Montreal-b (L4)",
+    },
+    {
+        "zone": "northamerica-northeast1-c",
+        "instance": "whisperx-gpu-mtl2",
+        "machine_type": "n1-standard-4",
+        "accelerator": "nvidia-tesla-t4",
+        "accel_count": 1,
+        "label": "Montreal-c (T4)",
+    },
+    {
+        "zone": "northamerica-northeast2-a",
+        "instance": "whisperx-gpu-tor1",
+        "machine_type": "g2-standard-4",
+        "accelerator": "nvidia-l4",
+        "accel_count": 1,
+        "label": "Toronto-a (L4)",
+    },
+    {
+        "zone": "northamerica-northeast2-b",
+        "instance": "whisperx-gpu",
+        "machine_type": "g2-standard-4",
+        "accelerator": "nvidia-l4",
+        "accel_count": 1,
+        "label": "Toronto-b (L4)",
+    },
+]
+
+STARTUP_SCRIPT = """#!/bin/bash
+systemctl start docker
+sleep 5
+docker start whisperx-asr 2>/dev/null || true
+systemctl start ollama 2>/dev/null || true
+"""
+
+app = FastAPI(title="DictIA ASR Proxy")
+
+# State
+last_request_time = 0.0
+active_requests = 0
+gpu_ip: str | None = None
+active_zone: dict | None = None
+shutdown_task: asyncio.Task | None = None
+
+# Request history tracking (in-memory, last 20 requests)
+request_history: list[dict] = []
+MAX_HISTORY = 20
+
+# Zone status tracking
+zone_status: dict[str, dict] = {}
+
+# Startup lock and failure cooldown
+_startup_lock: asyncio.Lock | None = None
+_last_failure_time: float = 0
+FAILURE_COOLDOWN = 180
+
+# OAuth2 token cache
+_access_token: str | None = None
+_token_expiry: float = 0
+
+
+# --- Usage Stats ---
+
+def load_stats() -> dict:
+    try:
+        with open(STATS_FILE) as f:
+            return json.load(f)
+    except (FileNotFoundError, json.JSONDecodeError):
+        return {"gpu_seconds": 0, "month": time.strftime("%Y-%m"), "requests": 0, "last_start": 0}
+
+
+def save_stats(stats: dict):
+    with open(STATS_FILE, "w") as f:
+        json.dump(stats, f, indent=2)
+
+
+def track_gpu_time():
+    stats = load_stats()
+    current_month = time.strftime("%Y-%m")
+    if stats.get("month") != current_month:
+        stats = {"gpu_seconds": 0, "month": current_month, "requests": 0, "last_start": 0}
+    if stats.get("last_start", 0) > 0:
+        elapsed = time.time() - stats["last_start"]
+        stats["gpu_seconds"] += elapsed
+    stats["last_start"] = 0
+    save_stats(stats)
+
+
+def check_budget() -> tuple[bool, float]:
+    stats = load_stats()
+    current_month = time.strftime("%Y-%m")
+    if stats.get("month") != current_month:
+        return True, 0.0
+    hours_used = stats.get("gpu_seconds", 0) / 3600
+    return hours_used < MONTHLY_LIMIT_HOURS, hours_used
+
+
+# --- GCP Auth ---
+
+async def get_access_token() -> str:
+    global _access_token, _token_expiry
+    if _access_token and time.time() < _token_expiry - 60:
+        return _access_token
+    with open(CREDS_FILE) as f:
+        creds = json.load(f)
+    cred_type = creds.get("type", "authorized_user")
+    async with httpx.AsyncClient() as client:
+        if cred_type == "service_account":
+            now = int(time.time())
+            payload = {
+                "iss": creds["client_email"],
+                "scope": "https://www.googleapis.com/auth/compute",
+                "aud": "https://oauth2.googleapis.com/token",
+                "iat": now,
+                "exp": now + 3600,
+            }
+            signed = pyjwt.encode(payload, creds["private_key"], algorithm="RS256")
+            resp = await client.post(
+                "https://oauth2.googleapis.com/token",
+                data={
+                    "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
+                    "assertion": signed,
+                },
+            )
+        else:
+            resp = await client.post(
+                "https://oauth2.googleapis.com/token",
+                data={
+                    "client_id": creds["client_id"],
+                    "client_secret": creds["client_secret"],
+                    "refresh_token": creds["refresh_token"],
+                    "grant_type": "refresh_token",
+                },
+            )
+        resp.raise_for_status()
+        data = resp.json()
+        _access_token = data["access_token"]
+        _token_expiry = time.time() + data.get("expires_in", 3600)
+        log.info(f"Refreshed GCP access token ({cred_type})")
+        return _access_token
+
+
+# --- GCP Compute API ---
+
+COMPUTE_BASE = "https://compute.googleapis.com/compute/v1"
+
+
+async def gcp_api(method: str, url: str, **kwargs) -> httpx.Response:
+    token = await get_access_token()
+    async with httpx.AsyncClient(timeout=60) as client:
+        resp = await client.request(
+            method, url,
+            headers={"Authorization": f"Bearer {token}"},
+            **kwargs,
+        )
+        return resp
+
+
+async def get_instance_info(zone: str, instance: str) -> dict | None:
+    url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}"
+    resp = await gcp_api("GET", url)
+    if resp.status_code == 404:
+        return None
+    if resp.status_code >= 400:
+        log.error(f"GCP API error {resp.status_code}: {resp.text}")
+        return None
+    return resp.json()
+
+
+def extract_ip(instance_data: dict) -> str:
+    interfaces = instance_data.get("networkInterfaces", [])
+    if interfaces:
+        access = interfaces[0].get("accessConfigs", [])
+        if access:
+            return access[0].get("natIP", "")
+    return ""
+
+
+async def start_instance_in_zone(zone: str, instance: str) -> bool:
+    url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}/start"
+    resp = await gcp_api("POST", url)
+    if resp.status_code < 400:
+        log.info(f"Start requested: {instance} in {zone}")
+        return True
+    log.warning(f"Failed to start {instance} in {zone}: {resp.status_code} {resp.text}")
+    return False
+
+
+async def stop_instance_in_zone(zone: str, instance: str):
+    url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}/stop"
+    resp = await gcp_api("POST", url)
+    if resp.status_code < 400:
+        log.info(f"Stop requested: {instance} in {zone}")
+    else:
+        log.error(f"Failed to stop {instance} in {zone}: {resp.status_code}")
+
+
+async def create_instance_from_snapshot(config: dict) -> bool:
+    zone = config["zone"]
+    instance = config["instance"]
+    machine = config["machine_type"]
+    accel = config["accelerator"]
+    accel_count = config["accel_count"]
+
+    log.info(f"Creating {instance} in {zone} from snapshot...")
+
+    body = {
+        "name": instance,
+        "machineType": f"zones/{zone}/machineTypes/{machine}",
+        "disks": [{
+            "boot": True,
+            "autoDelete": True,
+            "initializeParams": {
+                "diskSizeGb": "50",
+                "diskType": f"zones/{zone}/diskTypes/pd-ssd",
+                "sourceSnapshot": f"global/snapshots/{SNAPSHOT_NAME}",
+            },
+        }],
+        "networkInterfaces": [{
+            "network": "global/networks/default",
+            "accessConfigs": [{"type": "ONE_TO_ONE_NAT", "name": "External NAT"}],
+        }],
+        "guestAccelerators": [{
+            "acceleratorType": f"zones/{zone}/acceleratorTypes/{accel}",
+            "acceleratorCount": accel_count,
+        }],
+        "scheduling": {
+            "onHostMaintenance": "TERMINATE",
+            "automaticRestart": False,
+        },
+        "tags": {"items": ["whisperx-gpu"]},
+        "metadata": {
+            "items": [{"key": "startup-script", "value": STARTUP_SCRIPT}],
+        },
+    }
+
+    url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances"
+    resp = await gcp_api("POST", url, json=body)
+
+    if resp.status_code < 400:
+        log.info(f"Created {instance} in {zone}")
+        return True
+
+    error_text = resp.text
+    if "ZONE_RESOURCE_POOL_EXHAUSTED" in error_text:
+        log.warning(f"No capacity in {zone} -- skipping")
+    elif "QUOTA" in error_text.upper():
+        log.warning(f"Quota exceeded for {zone}: {error_text[:200]}")
+    else:
+        log.error(f"Failed to create in {zone}: {resp.status_code} {error_text[:200]}")
+    return False
+
+
+# --- Core Logic ---
+
+async def wait_for_running(zone: str, instance: str, timeout: int = 120, grace: int = 15) -> bool:
+    gone_count = 0
+    start_time = time.time()
+    for _ in range(timeout // 5):
+        info = await get_instance_info(zone, instance)
+        if info and info.get("status") == "RUNNING":
+            return True
+        status = info.get("status", "UNKNOWN") if info else "GONE"
+        elapsed = time.time() - start_time
+        if status == "GONE":
+            gone_count += 1
+            if gone_count >= 2:
+                log.warning(f"{instance} in {zone}: instance disappeared (no capacity)")
+                return False
+        if status in ("STOPPING",):
+            log.warning(f"{instance} in {zone}: status {status} (no capacity)")
+            return False
+        if status in ("TERMINATED", "STOPPED") and elapsed > grace:
+            log.warning(f"{instance} in {zone}: status {status} after {elapsed:.0f}s (no capacity)")
+            return False
+        await asyncio.sleep(5)
+    return False
+
+
+async def delete_instance(zone: str, instance: str):
+    url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}"
+    resp = await gcp_api("DELETE", url)
+    if resp.status_code < 400:
+        log.info(f"Deleted {instance} in {zone} to free quota")
+    elif resp.status_code == 404:
+        pass
+    else:
+        log.warning(f"Failed to delete {instance} in {zone}: {resp.status_code}")
+
+
+async def ensure_gpu_running() -> str:
+    global gpu_ip, active_zone, _last_failure_time
+
+    if _last_failure_time > 0:
+        remaining = FAILURE_COOLDOWN - (time.time() - _last_failure_time)
+        if remaining > 0:
+            log.info(f"GPU cooldown active ({int(remaining)}s remaining), waiting...")
+            await asyncio.sleep(remaining)
+            _last_failure_time = 0
+
+    async with _startup_lock:
+        ok, hours = check_budget()
+        if not ok:
+            raise RuntimeError(f"Monthly GPU limit reached ({hours:.1f}h / {MONTHLY_LIMIT_HOURS}h)")
+
+        if active_zone:
+            info = await get_instance_info(active_zone["zone"], active_zone["instance"])
+            if info and info.get("status") == "RUNNING":
+                gpu_ip = extract_ip(info)
+                if gpu_ip:
+                    return gpu_ip
+
+        errors = []
+
+        for config in ZONE_FALLBACKS:
+            zone = config["zone"]
+            instance = config["instance"]
+            label = config["label"]
+
+            log.info(f"Trying {label}...")
+            info = await get_instance_info(zone, instance)
+
+            if info is None:
+                created = await create_instance_from_snapshot(config)
+                if not created:
+                    zone_status[label] = {
+                        "status": "no_capacity",
+                        "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                        "last_error": "no capacity",
+                    }
+                    errors.append(f"{label}: no capacity")
+                    continue
+                if not await wait_for_running(zone, instance, grace=30):
+                    zone_status[label] = {
+                        "status": "error",
+                        "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                        "last_error": "created but failed to start",
+                    }
+                    errors.append(f"{label}: created but failed to start")
+                    await delete_instance(zone, instance)
+                    await asyncio.sleep(3)
+                    continue
+            else:
+                status = info.get("status", "UNKNOWN")
+
+                if status == "RUNNING":
+                    pass
+                elif status in ("TERMINATED", "STOPPED"):
+                    zone_status[label] = {
+                        "status": "starting",
+                        "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                        "last_error": None,
+                    }
+                    started = await start_instance_in_zone(zone, instance)
+                    if not started:
+                        zone_status[label] = {
+                            "status": "error",
+                            "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                            "last_error": "start rejected",
+                        }
+                        errors.append(f"{label}: start rejected")
+                        continue
+                    if not await wait_for_running(zone, instance, grace=20):
+                        zone_status[label] = {
+                            "status": "error",
+                            "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                            "last_error": "didn't reach RUNNING",
+                        }
+                        errors.append(f"{label}: didn't reach RUNNING")
+                        continue
+                elif status in ("STAGING", "PROVISIONING"):
+                    zone_status[label] = {
+                        "status": "starting",
+                        "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                        "last_error": None,
+                    }
+                    if not await wait_for_running(zone, instance):
+                        zone_status[label] = {
+                            "status": "error",
+                            "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                            "last_error": f"stuck in {status}",
+                        }
+                        errors.append(f"{label}: stuck in {status}")
+                        continue
+                elif status == "STOPPING":
+                    log.info(f"{label}: STOPPING, deleting to free quota")
+                    await delete_instance(zone, instance)
+                    await asyncio.sleep(3)
+                    zone_status[label] = {
+                        "status": "error",
+                        "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                        "last_error": "was STOPPING, deleted",
+                    }
+                    errors.append(f"{label}: was STOPPING, deleted")
+                    continue
+
+            info = await get_instance_info(zone, instance)
+            if info and info.get("status") == "RUNNING":
+                gpu_ip = extract_ip(info)
+                if gpu_ip:
+                    active_zone = config
+                    _last_failure_time = 0
+                    zone_status[label] = {
+                        "status": "running",
+                        "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                        "last_error": None,
+                    }
+                    stats = load_stats()
+                    stats["last_start"] = time.time()
+                    stats["requests"] = stats.get("requests", 0) + 1
+                    stats["active_zone"] = label
+                    save_stats(stats)
+                    log.info(f"GPU ready in {label}, IP: {gpu_ip}")
+                    return gpu_ip
+
+            zone_status[label] = {
+                "status": "error",
+                "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
+                "last_error": "running but no IP",
+            }
+            errors.append(f"{label}: running but no IP")
+
+        _last_failure_time = time.time()
+        raise RuntimeError(
+            f"No GPU available in any Canadian zone. Tried: {'; '.join(errors)}"
+        )
+
+
+async def ensure_gpu_ready() -> str:
+    ip = await ensure_gpu_running()
+    url = f"http://{ip}:{WHISPERX_PORT}/health"
+    log.info(f"Waiting for WhisperX at {url}...")
+    async with httpx.AsyncClient(timeout=10) as client:
+        for _ in range(BOOT_TIMEOUT // HEALTH_POLL_INTERVAL):
+            try:
+                resp = await client.get(url)
+                if resp.status_code == 200:
+                    log.info("WhisperX is healthy!")
+                    return ip
+            except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout):
+                pass
+            await asyncio.sleep(HEALTH_POLL_INTERVAL)
+    raise RuntimeError("WhisperX did not become healthy in time")
+
+
+async def ensure_ollama_ready() -> str:
+    ip = await ensure_gpu_running()
+    url = f"http://{ip}:{OLLAMA_PORT}/api/tags"
+    log.info(f"Waiting for Ollama at {url}...")
+    async with httpx.AsyncClient(timeout=10) as client:
+        for _ in range(BOOT_TIMEOUT // HEALTH_POLL_INTERVAL):
+            try:
+                resp = await client.get(url)
+                if resp.status_code == 200:
+                    log.info("Ollama is healthy!")
+                    return ip
+            except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout):
+                pass
+            await asyncio.sleep(HEALTH_POLL_INTERVAL)
+    raise RuntimeError("Ollama did not become healthy in time")
+
+
+async def idle_shutdown_loop():
+    while True:
+        await asyncio.sleep(60)
+        if last_request_time == 0 or active_zone is None:
+            continue
+        if active_requests > 0:
+            continue
+        elapsed = time.time() - last_request_time
+        if elapsed >= IDLE_TIMEOUT:
+            try:
+                zone = active_zone["zone"]
+                instance = active_zone["instance"]
+                label = active_zone["label"]
+                info = await get_instance_info(zone, instance)
+                if info and info.get("status") == "RUNNING":
+                    log.info(f"Idle {int(elapsed)}s -- stopping {label}")
+                    await stop_instance_in_zone(zone, instance)
+                    track_gpu_time()
+            except Exception as e:
+                log.error(f"Error stopping: {e}")
+
+
+# --- Endpoints ---
+
+@app.on_event("startup")
+async def on_startup():
+    global shutdown_task, _startup_lock
+    _startup_lock = asyncio.Lock()
+    await get_access_token()
+    shutdown_task = asyncio.create_task(idle_shutdown_loop())
+    zones = ", ".join(c["label"] for c in ZONE_FALLBACKS)
+    log.info(f"DictIA ASR Proxy started. Zones: [{zones}]. Idle: {IDLE_TIMEOUT}s, limit: {MONTHLY_LIMIT_HOURS}h")
+
+
+@app.post("/asr")
+async def asr_proxy(request: Request):
+    global last_request_time, active_requests
+
+    body = await request.body()
+    headers = {
+        k: v for k, v in request.headers.items()
+        if k.lower() not in ("host", "transfer-encoding")
+    }
+
+    last_request_time = time.time()
+    active_requests += 1
+    start_time = time.time()
+    result_status = 200
+    try:
+        ip = await ensure_gpu_ready()
+        target = f"http://{ip}:{WHISPERX_PORT}/asr"
+        log.info(f"Forwarding {len(body)} bytes to {target}")
+        async with httpx.AsyncClient(timeout=httpx.Timeout(7200.0)) as client:
+            resp = await client.post(target, content=body, headers=headers)
+            last_request_time = time.time()
+            result_status = resp.status_code
+            ct = resp.headers.get("content-type", "")
+            if "application/json" in ct:
+                return JSONResponse(content=resp.json(), status_code=resp.status_code)
+            else:
+                return JSONResponse(content=resp.text, status_code=resp.status_code)
+    except httpx.ReadTimeout:
+        result_status = 504
+        return JSONResponse({"error": "Transcription timeout (2h)"}, status_code=504)
+    except Exception as e:
+        result_status = 502
+        log.error(f"Proxy error: {e}")
+        return JSONResponse({"error": str(e)}, status_code=502)
+    finally:
+        active_requests -= 1
+        last_request_time = time.time()
+        request_history.insert(0, {
+            "time": time.strftime("%Y-%m-%dT%H:%M:%S"),
+            "type": "ASR",
+            "duration_sec": round(time.time() - start_time, 1),
+            "status": result_status,
+            "zone": active_zone["label"] if active_zone else "none",
+        })
+        if len(request_history) > MAX_HISTORY:
+            request_history.pop()
+
+
+@app.get("/health")
+async def health():
+    zone_label = active_zone["label"] if active_zone else "none"
+    gpu_status = "unknown"
+    if active_zone:
+        try:
+            info = await get_instance_info(active_zone["zone"], active_zone["instance"])
+            gpu_status = info.get("status", "unknown") if info else "not_found"
+        except Exception:
+            pass
+    ok, hours = check_budget()
+    stats = load_stats()
+    return {
+        "proxy": "healthy",
+        "gpu_instance": gpu_status,
+        "gpu_zone": zone_label,
+        "active_requests": active_requests,
+        "idle_timeout": IDLE_TIMEOUT,
+        "usage": {
+            "month": stats.get("month"),
+            "gpu_hours": round(hours, 2),
+            "gpu_limit_hours": MONTHLY_LIMIT_HOURS,
+            "requests_count": stats.get("requests", 0),
+            "budget_ok": ok,
+        },
+        "gpu_ip": gpu_ip,
+        "machine_type": active_zone.get("machine_type", "unknown") if active_zone else "unknown",
+        "gpu_model": active_zone.get("accelerator", "unknown") if active_zone else "unknown",
+        "idle_seconds": round(time.time() - last_request_time) if last_request_time > 0 else 0,
+        "auto_shutdown_in": max(0, IDLE_TIMEOUT - round(time.time() - last_request_time)) if last_request_time > 0 and active_zone else None,
+        "token_expires_in": round(_token_expiry - time.time()) if _token_expiry > 0 else None,
+    }
+
+
+@app.get("/stats")
+async def get_stats():
+    stats = load_stats()
+    hours = stats.get("gpu_seconds", 0) / 3600
+    gpu_cost = hours * GPU_COST_PER_HOUR
+    total_cost = gpu_cost + FIXED_MONTHLY_COST
+    return {
+        "month": stats.get("month"),
+        "gpu_hours": round(hours, 2),
+        "gpu_minutes": round(hours * 60, 1),
+        "estimated_cost_usd": round(total_cost, 2),
+        "gpu_cost_usd": round(gpu_cost, 2),
+        "fixed_cost_usd": FIXED_MONTHLY_COST,
+        "monthly_limit_hours": MONTHLY_LIMIT_HOURS,
+        "remaining_hours": round(MONTHLY_LIMIT_HOURS - hours, 2),
+        "requests_count": stats.get("requests", 0),
+        "active_zone": stats.get("active_zone", "none"),
+        "cost_per_hour": GPU_COST_PER_HOUR,
+        "recent_requests": request_history[:10],
+        "zone_fallbacks": [
+            {
+                "label": config["label"],
+                "zone": config["zone"],
+                "machine": config["machine_type"],
+                "gpu": config["accelerator"],
+                **zone_status.get(config["label"], {"status": "unknown", "last_tried": None, "last_error": None}),
+            }
+            for config in ZONE_FALLBACKS
+        ],
+    }
+
+
+@app.post("/gpu/start")
+async def gpu_start():
+    try:
+        ip = await ensure_gpu_ready()
+        label = active_zone["label"] if active_zone else "unknown"
+        return {"status": "running", "ip": ip, "zone": label}
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=503)
+
+
+@app.post("/gpu/stop")
+async def gpu_stop():
+    if not active_zone:
+        return {"status": "no active instance"}
+    try:
+        await stop_instance_in_zone(active_zone["zone"], active_zone["instance"])
+        track_gpu_time()
+        return {"status": "stopped", "zone": active_zone["label"]}
+    except Exception as e:
+        return JSONResponse({"error": str(e)}, status_code=500)
+
+
+DASHBOARD_HTML = Path(__file__).parent / "dashboard.html"
+
+
+@app.get("/", response_class=HTMLResponse)
+async def dashboard():
+    if DASHBOARD_HTML.exists():
+        return HTMLResponse(DASHBOARD_HTML.read_text(encoding="utf-8"))
+    return HTMLResponse("<h1>Dashboard not found</h1><p>Place dashboard.html next to proxy.py</p>", status_code=404)
+
+
+@app.api_route("/v1/{path:path}", methods=["POST", "GET"])
+async def llm_proxy(request: Request, path: str):
+    global last_request_time, active_requests
+
+    body = await request.body()
+    headers = {
+        k: v for k, v in request.headers.items()
+        if k.lower() not in ("host", "transfer-encoding")
+    }
+
+    last_request_time = time.time()
+    active_requests += 1
+    start_time = time.time()
+    result_status = 200
+    try:
+        ip = await ensure_ollama_ready()
+        target = f"http://{ip}:{OLLAMA_PORT}/v1/{path}"
+        log.info(f"Forwarding LLM request to {target}")
+        async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client:
+            resp = await client.request(request.method, target, content=body, headers=headers)
+            last_request_time = time.time()
+            result_status = resp.status_code
+            return Response(
+                content=resp.content,
+                status_code=resp.status_code,
+                media_type=resp.headers.get("content-type"),
+            )
+    except httpx.ReadTimeout:
+        result_status = 504
+        return JSONResponse({"error": "LLM timeout (5min)"}, status_code=504)
+    except Exception as e:
+        result_status = 502
+        log.error(f"LLM proxy error: {e}")
+        return JSONResponse({"error": str(e)}, status_code=502)
+    finally:
+        active_requests -= 1
+        last_request_time = time.time()
+        request_history.insert(0, {
+            "time": time.strftime("%Y-%m-%dT%H:%M:%S"),
+            "type": "LLM",
+            "duration_sec": round(time.time() - start_time, 1),
+            "status": result_status,
+            "zone": active_zone["label"] if active_zone else "none",
+        })
+        if len(request_history) > MAX_HISTORY:
+            request_history.pop()
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=9090)
--- a/deployment/asr-proxy/requirements.txt
+++ b/deployment/asr-proxy/requirements.txt
@@ -0,0 +1,5 @@
+fastapi==0.115.0
+uvicorn==0.30.0
+httpx==0.27.0
+PyJWT==2.9.0
+cryptography>=43.0.0
--- a/deployment/asr-proxy/setup.sh
+++ b/deployment/asr-proxy/setup.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+# DictIA ASR Proxy — Setup script
+# Installs the GCP GPU proxy for cloud deployments.
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+INSTALL_DIR="${ASR_PROXY_DIR:-$SCRIPT_DIR}"
+SERVICE_USER="${ASR_PROXY_USER:-$(whoami)}"
+
+echo "=== DictIA ASR Proxy Setup ==="
+echo "Install directory: $INSTALL_DIR"
+echo "Service user: $SERVICE_USER"
+echo
+
+# 1. Create virtual environment
+if [ ! -d "$INSTALL_DIR/venv" ]; then
+    echo "[1/4] Creating Python virtual environment..."
+    python3 -m venv "$INSTALL_DIR/venv"
+else
+    echo "[1/4] Virtual environment already exists."
+fi
+
+# 2. Install dependencies
+echo "[2/4] Installing Python dependencies..."
+"$INSTALL_DIR/venv/bin/pip" install --quiet --upgrade pip
+"$INSTALL_DIR/venv/bin/pip" install --quiet -r "$INSTALL_DIR/requirements.txt"
+
+# 3. GCP credentials
+if [ ! -f "$INSTALL_DIR/gcp-credentials.json" ]; then
+    echo "[3/4] GCP credentials not found."
+    echo "  Place your GCP service account or OAuth credentials at:"
+    echo "  $INSTALL_DIR/gcp-credentials.json"
+    echo
+    echo "  For service account: download JSON from GCP Console > IAM > Service Accounts"
+    echo "  For user credentials: run 'gcloud auth application-default login' and copy the file"
+    echo
+    read -rp "  Path to credentials file (or press Enter to skip): " CREDS_PATH
+    if [ -n "$CREDS_PATH" ] && [ -f "$CREDS_PATH" ]; then
+        cp "$CREDS_PATH" "$INSTALL_DIR/gcp-credentials.json"
+        chmod 600 "$INSTALL_DIR/gcp-credentials.json"
+        echo "  Credentials copied."
+    else
+        echo "  Skipped. You must add credentials before starting the proxy."
+    fi
+else
+    echo "[3/4] GCP credentials found."
+fi
+
+# 4. Install systemd service
+echo "[4/4] Installing systemd service..."
+SERVICE_FILE="/etc/systemd/system/asr-proxy.service"
+
+cat > /tmp/asr-proxy.service <<UNIT
+[Unit]
+Description=DictIA ASR Proxy - GPU Auto-Start/Stop for WhisperX
+After=network.target
+
+[Service]
+Type=simple
+User=$SERVICE_USER
+Restart=always
+RestartSec=10
+WorkingDirectory=$INSTALL_DIR
+ExecStart=$INSTALL_DIR/venv/bin/python proxy.py
+Environment=GOOGLE_APPLICATION_CREDENTIALS=$INSTALL_DIR/gcp-credentials.json
+Environment=STATS_FILE=$INSTALL_DIR/usage-stats.json
+
+[Install]
+WantedBy=multi-user.target
+UNIT
+
+if [ "$(id -u)" -eq 0 ]; then
+    cp /tmp/asr-proxy.service "$SERVICE_FILE"
+    systemctl daemon-reload
+    systemctl enable asr-proxy.service
+    echo "  Service installed and enabled."
+    echo "  Start with: systemctl start asr-proxy"
+else
+    echo "  Run as root to install systemd service, or copy manually:"
+    echo "  sudo cp /tmp/asr-proxy.service $SERVICE_FILE"
+    echo "  sudo systemctl daemon-reload && sudo systemctl enable asr-proxy"
+fi
+
+echo
+echo "=== Setup complete ==="
+echo "Dashboard: http://localhost:9090"
+echo "Health:    http://localhost:9090/health"
--- a/deployment/config/nginx/dictia.conf
+++ b/deployment/config/nginx/dictia.conf
@@ -0,0 +1,83 @@
+# DictIA — Nginx reverse proxy configuration
+#
+# Alternative to Tailscale Serve for exposing DictIA over HTTPS.
+# Replace YOUR_DOMAIN with your actual domain name.
+#
+# Install: sudo cp dictia.conf /etc/nginx/sites-available/dictia
+#          sudo ln -s /etc/nginx/sites-available/dictia /etc/nginx/sites-enabled/
+#          sudo nginx -t && sudo systemctl reload nginx
+#
+# For HTTPS with Let's Encrypt:
+#   sudo certbot --nginx -d YOUR_DOMAIN
+
+upstream dictia_app {
+    server 127.0.0.1:8899;
+}
+
+upstream asr_proxy {
+    server 127.0.0.1:9090;
+}
+
+server {
+    listen 80;
+    server_name YOUR_DOMAIN;
+
+    # Redirect HTTP to HTTPS (uncomment after certbot setup)
+    # return 301 https://$host$request_uri;
+
+    client_max_body_size 500M;
+
+    # DictIA app
+    location / {
+        proxy_pass http://dictia_app;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+
+        # WebSocket support (for real-time features)
+        proxy_http_version 1.1;
+        proxy_set_header Upgrade $http_upgrade;
+        proxy_set_header Connection "upgrade";
+
+        # Long timeouts for transcription uploads
+        proxy_read_timeout 3600s;
+        proxy_send_timeout 3600s;
+        proxy_connect_timeout 60s;
+    }
+
+    # ASR Proxy dashboard (optional, restrict access)
+    location /asr-proxy/ {
+        proxy_pass http://asr_proxy/;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+    }
+}
+
+# HTTPS server block (managed by certbot, uncomment after setup)
+# server {
+#     listen 443 ssl;
+#     server_name YOUR_DOMAIN;
+#
+#     ssl_certificate /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem;
+#     ssl_certificate_key /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem;
+#     include /etc/letsencrypt/options-ssl-nginx.conf;
+#     ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;
+#
+#     client_max_body_size 500M;
+#
+#     location / {
+#         proxy_pass http://dictia_app;
+#         proxy_set_header Host $host;
+#         proxy_set_header X-Real-IP $remote_addr;
+#         proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+#         proxy_set_header X-Forwarded-Proto $scheme;
+#         proxy_http_version 1.1;
+#         proxy_set_header Upgrade $http_upgrade;
+#         proxy_set_header Connection "upgrade";
+#         proxy_read_timeout 3600s;
+#         proxy_send_timeout 3600s;
+#     }
+# }
--- a/deployment/config/systemd/dictia.service
+++ b/deployment/config/systemd/dictia.service
@@ -0,0 +1,15 @@
+[Unit]
+Description=DictIA - Docker Compose Application
+After=docker.service
+Requires=docker.service
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+WorkingDirectory=/opt/dictia
+ExecStart=/usr/bin/docker compose -f deployment/docker/docker-compose.cloud.yml up -d
+ExecStop=/usr/bin/docker compose -f deployment/docker/docker-compose.cloud.yml down
+TimeoutStartSec=120
+
+[Install]
+WantedBy=multi-user.target
--- a/deployment/config/tailscale/setup-serve.sh
+++ b/deployment/config/tailscale/setup-serve.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# DictIA — Tailscale Serve/Funnel setup
+#
+# Exposes DictIA and ASR Proxy dashboard via Tailscale HTTPS.
+# Based on the VPS production configuration.
+#
+# Usage:
+#   bash setup-serve.sh [serve|funnel]
+#     serve  — accessible only within your tailnet (default)
+#     funnel — accessible from the public internet
+set -euo pipefail
+
+MODE="${1:-serve}"
+
+echo "=== DictIA Tailscale Setup ==="
+echo "Mode: $MODE"
+echo
+
+# Verify Tailscale is connected
+if ! tailscale status >/dev/null 2>&1; then
+    echo "ERROR: Tailscale is not running or not connected."
+    echo "  Install: curl -fsSL https://tailscale.com/install.sh | sh"
+    echo "  Connect: sudo tailscale up"
+    exit 1
+fi
+
+HOSTNAME=$(tailscale status --json | python3 -c "import sys,json; print(json.load(sys.stdin)['Self']['DNSName'].rstrip('.'))" 2>/dev/null || echo "unknown")
+echo "Tailscale hostname: $HOSTNAME"
+echo
+
+# DictIA app on :443 → localhost:8899
+echo "[1/2] Setting up DictIA app (port 443 → 8899)..."
+if [ "$MODE" = "funnel" ]; then
+    tailscale funnel --bg --https=443 http://localhost:8899
+else
+    tailscale serve --bg --https=443 http://localhost:8899
+fi
+
+# ASR Proxy dashboard on :9443 → localhost:9090
+echo "[2/2] Setting up ASR Proxy dashboard (port 9443 → 9090)..."
+if [ "$MODE" = "funnel" ]; then
+    tailscale funnel --bg --https=9443 http://localhost:9090
+else
+    tailscale serve --bg --https=9443 http://localhost:9090
+fi
+
+echo
+echo "=== Setup complete ==="
+echo "DictIA:        https://$HOSTNAME/"
+echo "ASR Dashboard: https://$HOSTNAME:9443/"
+echo
+echo "Verify with: tailscale serve status"
--- a/deployment/docker/.env.example
+++ b/deployment/docker/.env.example
@@ -0,0 +1,124 @@
+# =============================================================================
+# DictIA — Unified Environment Configuration
+# =============================================================================
+#
+# Copy this file to the project root as .env and edit the values.
+#   cp deployment/docker/.env.example .env
+#
+# This template combines upstream settings with DictIA deployment vars.
+# See: config/env.transcription.example for full upstream documentation.
+
+# =============================================================================
+# FLASK SECRET KEY (REQUIRED — auto-generated by setup.sh)
+# =============================================================================
+SECRET_KEY=change-me-to-a-random-string
+
+# =============================================================================
+# DEPLOYMENT PROFILE (used by deployment scripts)
+# =============================================================================
+# Options: cloud, local-cpu, local-gpu
+DICTIA_PROFILE=cloud
+
+# =============================================================================
+# TEXT GENERATION MODEL (REQUIRED for summaries, titles, chat)
+# =============================================================================
+TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
+TEXT_MODEL_API_KEY=your_openrouter_api_key
+TEXT_MODEL_NAME=openai/gpt-4o-mini
+
+# =============================================================================
+# TRANSCRIPTION CONFIGURATION
+# =============================================================================
+# For cloud profile (ASR Proxy → GCP GPU):
+#   ASR_BASE_URL is set automatically in docker-compose.cloud.yml
+#   No need to set it here.
+#
+# For local profiles (WhisperX sidecar):
+#   ASR_BASE_URL is set automatically in docker-compose.local-*.yml
+#   No need to set it here.
+#
+# For OpenAI API instead of self-hosted ASR:
+# TRANSCRIPTION_API_KEY=sk-your_openai_api_key
+# TRANSCRIPTION_MODEL=gpt-4o-transcribe-diarize
+
+# ASR model (for local WhisperX profiles)
+ASR_MODEL=large-v3
+
+# HuggingFace token (required for diarization with pyannote)
+# Get yours at: https://huggingface.co/settings/tokens
+# Must accept: https://huggingface.co/pyannote/speaker-diarization-3.1
+HF_TOKEN=
+
+# =============================================================================
+# ASR PROXY — CLOUD PROFILE ONLY
+# =============================================================================
+# GCP project for GPU instances
+# GCP_PROJECT=your-gcp-project
+
+# Monthly GPU budget limit in hours (default: 50)
+# MONTHLY_LIMIT_HOURS=50
+
+# Idle timeout before auto-stopping GPU (seconds, default: 300)
+# IDLE_TIMEOUT=300
+
+# =============================================================================
+# APPLICATION SETTINGS
+# =============================================================================
+ADMIN_USERNAME=admin
+ADMIN_EMAIL=admin@example.com
+ADMIN_PASSWORD=changeme
+
+ALLOW_REGISTRATION=false
+TIMEZONE="America/Toronto"
+LOG_LEVEL=ERROR
+LOCALE=fr_CA
+DEFAULT_LANGUAGE=fr
+SHOW_USERNAMES_IN_UI=true
+SESSION_COOKIE_HTTPONLY=true
+SESSION_COOKIE_SAMESITE=Lax
+SESSION_COOKIE_SECURE=true
+
+# =============================================================================
+# OPTIONAL FEATURES
+# =============================================================================
+ENABLE_INQUIRE_MODE=false
+ENABLE_AUTO_PROCESSING=false
+ENABLE_AUTO_EXPORT=false
+ENABLE_AUTO_DELETION=false
+ENABLE_INTERNAL_SHARING=true
+ENABLE_PUBLIC_SHARING=true
+ENABLE_FOLDERS=true
+VIDEO_RETENTION=true
+USERS_CAN_DELETE=true
+
+# =============================================================================
+# BACKGROUND PROCESSING
+# =============================================================================
+JOB_QUEUE_WORKERS=4
+SUMMARY_QUEUE_WORKERS=4
+JOB_MAX_RETRIES=3
+MAX_CONCURRENT_UPLOADS=3
+
+# =============================================================================
+# TRANSCRIPTION SETTINGS
+# =============================================================================
+TRANSCRIPTION_CONNECTOR=asr_endpoint
+USE_NEW_TRANSCRIPTION_ARCHITECTURE=true
+ENABLE_CHUNKING=true
+CHUNK_LIMIT=2400s
+CHUNK_OVERLAP_SECONDS=5
+
+# =============================================================================
+# LLM / SUMMARY SETTINGS
+# =============================================================================
+SUMMARY_LANGUAGE=fr
+SUMMARY_MAX_TOKENS=16000
+CHAT_MAX_TOKENS=12000
+ENABLE_STREAM_OPTIONS=false
+ENABLE_THINKING=false
+
+# =============================================================================
+# DOCKER/DATABASE
+# =============================================================================
+SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
+UPLOAD_FOLDER=/data/uploads
--- a/deployment/docker/docker-compose.cloud.yml
+++ b/deployment/docker/docker-compose.cloud.yml
@@ -0,0 +1,40 @@
+# DictIA — Cloud deployment (VPS + ASR Proxy GCP GPU)
+#
+# Usage:
+#   docker compose -f deployment/docker/docker-compose.cloud.yml up -d
+#
+# ASR is handled by the external asr-proxy (port 9090) which auto-starts
+# a GCP GPU instance on demand. DictIA connects via host.docker.internal.
+
+services:
+  dictia:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    image: innova-ai/dictia:latest
+    container_name: dictia
+    restart: unless-stopped
+    ports:
+      - "8899:8899"
+    env_file:
+      - ../../.env
+    environment:
+      - LOG_LEVEL=${LOG_LEVEL:-ERROR}
+      - ASR_BASE_URL=http://host.docker.internal:9090
+    volumes:
+      - ../../data/uploads:/data/uploads
+      - ../../data/instance:/data/instance
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    healthcheck:
+      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+    networks:
+      - dictia-network
+
+networks:
+  dictia-network:
+    driver: bridge
--- a/deployment/docker/docker-compose.local-cpu.yml
+++ b/deployment/docker/docker-compose.local-cpu.yml
@@ -0,0 +1,64 @@
+# DictIA — Local CPU deployment (WhisperX on CPU + DictIA)
+#
+# Usage:
+#   docker compose -f deployment/docker/docker-compose.local-cpu.yml up -d
+#
+# Warning: CPU transcription is significantly slower than GPU.
+# Expect ~10x real-time for large files (e.g., 1h audio = ~10h processing).
+
+services:
+  whisperx-asr:
+    image: ghcr.io/jim60105/whisperx-asr:latest
+    container_name: whisperx-asr
+    restart: unless-stopped
+    ports:
+      - "9000:9000"
+    environment:
+      - ASR_MODEL=${ASR_MODEL:-large-v3}
+      - ASR_ENGINE=whisperx
+      - DEVICE=cpu
+      - COMPUTE_TYPE=float32
+      - HF_TOKEN=${HF_TOKEN:-}
+    volumes:
+      - whisperx-cache:/root/.cache
+    deploy:
+      resources:
+        limits:
+          memory: 18G
+    networks:
+      - dictia-network
+
+  dictia:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    image: innova-ai/dictia:latest
+    container_name: dictia
+    restart: unless-stopped
+    ports:
+      - "8899:8899"
+    env_file:
+      - ../../.env
+    environment:
+      - LOG_LEVEL=${LOG_LEVEL:-ERROR}
+      - ASR_BASE_URL=http://whisperx-asr:9000
+    volumes:
+      - ../../data/uploads:/data/uploads
+      - ../../data/instance:/data/instance
+    depends_on:
+      - whisperx-asr
+    healthcheck:
+      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+    networks:
+      - dictia-network
+
+volumes:
+  whisperx-cache:
+
+networks:
+  dictia-network:
+    driver: bridge
--- a/deployment/docker/docker-compose.local-gpu.yml
+++ b/deployment/docker/docker-compose.local-gpu.yml
@@ -0,0 +1,69 @@
+# DictIA — Local GPU deployment (WhisperX on NVIDIA GPU + DictIA)
+#
+# Usage:
+#   docker compose -f deployment/docker/docker-compose.local-gpu.yml up -d
+#
+# Prerequisites:
+#   - NVIDIA GPU with CUDA support
+#   - nvidia-container-toolkit installed
+#   - Docker configured with nvidia runtime
+
+services:
+  whisperx-asr:
+    image: ghcr.io/jim60105/whisperx-asr:latest-cuda
+    container_name: whisperx-asr
+    restart: unless-stopped
+    ports:
+      - "9000:9000"
+    environment:
+      - ASR_MODEL=${ASR_MODEL:-large-v3}
+      - ASR_ENGINE=whisperx
+      - DEVICE=cuda
+      - COMPUTE_TYPE=float16
+      - HF_TOKEN=${HF_TOKEN:-}
+    volumes:
+      - whisperx-cache:/root/.cache
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    networks:
+      - dictia-network
+
+  dictia:
+    build:
+      context: ../..
+      dockerfile: Dockerfile
+    image: innova-ai/dictia:latest
+    container_name: dictia
+    restart: unless-stopped
+    ports:
+      - "8899:8899"
+    env_file:
+      - ../../.env
+    environment:
+      - LOG_LEVEL=${LOG_LEVEL:-ERROR}
+      - ASR_BASE_URL=http://whisperx-asr:9000
+    volumes:
+      - ../../data/uploads:/data/uploads
+      - ../../data/instance:/data/instance
+    depends_on:
+      - whisperx-asr
+    healthcheck:
+      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+    networks:
+      - dictia-network
+
+volumes:
+  whisperx-cache:
+
+networks:
+  dictia-network:
+    driver: bridge
--- a/deployment/docs/LOCAL-SETUP.md
+++ b/deployment/docs/LOCAL-SETUP.md
@@ -0,0 +1,118 @@
+# Setup Local — DictIA
+
+Guide pour deployer DictIA localement avec GPU NVIDIA ou CPU.
+
+## Profil local-gpu
+
+### Prerequis
+
+- NVIDIA GPU avec support CUDA
+- [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
+- Docker + Docker Compose V2
+- 8GB+ RAM (16GB recommande)
+- Token HuggingFace (pour la diarisation)
+
+### Installation nvidia-container-toolkit
+
+```bash
+# Ubuntu/Debian
+curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \
+    sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
+curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
+    sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
+    sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+sudo apt-get update
+sudo apt-get install -y nvidia-container-toolkit
+sudo nvidia-ctk runtime configure --runtime=docker
+sudo systemctl restart docker
+
+# Verifier
+docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
+```
+
+### Setup DictIA
+
+```bash
+cd dictia
+bash deployment/setup.sh --profile local-gpu
+```
+
+Le setup va verifier:
+- nvidia-container-toolkit installe
+- GPU accessible depuis Docker
+- Assez de RAM disponible
+
+### Configuration du modele
+
+Par defaut, WhisperX utilise `large-v3`. Pour changer:
+
+```bash
+# Editer .env
+ASR_MODEL=large-v3      # Meilleure qualite
+# ASR_MODEL=medium       # Plus rapide, qualite correcte
+# ASR_MODEL=small        # Tres rapide, qualite reduite
+```
+
+---
+
+## Profil local-cpu
+
+### Prerequis
+
+- Docker + Docker Compose V2
+- 18GB+ RAM (WhisperX CPU est gourmand)
+- Patience (transcription ~10x temps reel)
+
+### Setup
+
+```bash
+cd dictia
+bash deployment/setup.sh --profile local-cpu
+```
+
+### Limitations
+
+- Transcription lente: 1h d'audio prend ~10h
+- Utilise float32 (pas de GPU acceleration)
+- Limite memoire a 18GB par defaut
+- Recommande pour: tests, petits fichiers, demos
+
+Pour reduire l'utilisation memoire, utiliser un modele plus petit:
+
+```bash
+# Editer .env
+ASR_MODEL=small    # ou medium, base, tiny
+```
+
+---
+
+## Verification
+
+```bash
+# Health check
+bash deployment/tools/health-check.sh
+
+# Test rapide: ouvrir le navigateur
+open http://localhost:8899
+
+# Verifier WhisperX
+curl http://localhost:9000/health
+```
+
+## Gestion des containers
+
+```bash
+COMPOSE_FILE=deployment/docker/docker-compose.local-gpu.yml  # ou local-cpu
+
+# Logs
+docker compose -f $COMPOSE_FILE logs -f
+
+# Redemarrer
+docker compose -f $COMPOSE_FILE restart
+
+# Arreter
+docker compose -f $COMPOSE_FILE down
+
+# Voir l'utilisation GPU
+nvidia-smi  # (profil GPU seulement)
+```
--- a/deployment/docs/MAINTENANCE.md
+++ b/deployment/docs/MAINTENANCE.md
@@ -0,0 +1,136 @@
+# Maintenance — DictIA
+
+## Backup
+
+```bash
+# Backup complet (data, .env, volumes, stats ASR)
+bash deployment/tools/backup.sh
+
+# Backup dans un repertoire specifique
+bash deployment/tools/backup.sh /mnt/backups
+```
+
+Les backups sont sauvegardes dans `backups/` avec rotation automatique (garde les 5 derniers).
+
+Contenu d'un backup:
+- `data/` — uploads et base de donnees SQLite
+- `dot-env` — fichier de configuration
+- `asr-usage-stats.json` — stats d'utilisation GPU
+- `whisperx-cache.tar.gz` — cache modeles (si volume Docker)
+- `manifest.json` — metadonnees du backup
+
+### Schedule recommande
+
+| Frequence | Action |
+|-----------|--------|
+| Quotidien | `bash deployment/tools/backup.sh` |
+| Hebdomadaire | Copier le backup sur un stockage externe |
+| Mensuel | Verifier la restauration sur un environnement de test |
+
+Pour automatiser avec cron:
+
+```bash
+# Backup quotidien a 3h du matin
+0 3 * * * /opt/dictia/deployment/tools/backup.sh >> /var/log/dictia-backup.log 2>&1
+```
+
+## Restore
+
+```bash
+# Lister les backups disponibles
+ls -la backups/
+
+# Restaurer un backup
+bash deployment/tools/restore.sh backups/dictia-20260211-030000.tar.gz
+```
+
+Le script:
+1. Valide l'archive (presence du manifest)
+2. Demande confirmation
+3. Arrete les containers
+4. Restaure les fichiers
+5. Redemarre les containers
+
+## Mise a jour
+
+```bash
+# Mise a jour complete (git pull + rebuild + restart)
+bash deployment/tools/update.sh
+
+# Rebuild seulement (sans git pull)
+bash deployment/tools/update.sh --no-pull
+
+# Git pull seulement (sans rebuild)
+bash deployment/tools/update.sh --no-build
+```
+
+Le script:
+1. Detecte le profil actif automatiquement
+2. `git pull origin dictia-branding`
+3. `docker build -t innova-ai/dictia:latest .`
+4. Pull WhisperX upstream (profils locaux)
+5. `docker compose down && up -d`
+6. Attend le health check
+7. Nettoie les images dangling
+
+## Monitoring
+
+### Health check
+
+```bash
+# Diagnostic complet (humain)
+bash deployment/tools/health-check.sh
+
+# JSON (pour alertes/scripts)
+bash deployment/tools/health-check.sh --json
+
+# Code de sortie seulement (0=ok, 1=probleme)
+bash deployment/tools/health-check.sh --quiet
+```
+
+### Logs
+
+```bash
+# DictIA
+docker logs dictia -f --tail 100
+
+# WhisperX (profils locaux)
+docker logs whisperx-asr -f --tail 100
+
+# ASR Proxy (profil cloud)
+journalctl -u asr-proxy -f
+```
+
+### Dashboard GPU (profil cloud)
+
+Le dashboard de monitoring GPU est accessible a:
+- `http://localhost:9090` (local)
+- `https://votre-hostname.tailnet.ts.net:9443` (Tailscale)
+
+Affiche: statut GPU, cout mensuel, historique des requetes, zones de fallback.
+
+### Metriques cles
+
+```bash
+# Espace disque (les transcriptions grossissent)
+df -h /opt/dictia/data/
+
+# Utilisation memoire (WhisperX est gourmand)
+docker stats --no-stream
+
+# Stats GPU (profil cloud)
+curl -s http://localhost:9090/stats | python3 -m json.tool
+```
+
+## Maintenance Docker
+
+```bash
+# Nettoyer les images orphelines
+docker image prune -f
+
+# Nettoyer tout (attention: supprime les volumes non utilises)
+# docker system prune -a --volumes
+
+# Verifier l'espace Docker
+docker system df
+```
--- a/deployment/docs/QUICKSTART.md
+++ b/deployment/docs/QUICKSTART.md
@@ -0,0 +1,90 @@
+# Quickstart — DictIA
+
+## Prerequis communs
+
+- Docker + Docker Compose V2
+- Git
+- 2GB+ RAM disponible
+
+```bash
+git clone https://gitea.innova-ai.ca/Innova-AI/dictia.git
+cd dictia
+git checkout dictia-branding
+```
+
+---
+
+## Profil Cloud (VPS + GCP GPU)
+
+Le GPU demarre automatiquement quand quelqu'un transcrit, et s'arrete apres 5 min d'inactivite.
+
+```bash
+# 1. Setup interactif
+bash deployment/setup.sh --profile cloud
+
+# 2. Setup ASR Proxy (GCP credentials requises)
+bash deployment/asr-proxy/setup.sh
+
+# 3. Optionnel: Tailscale Serve pour HTTPS
+bash deployment/config/tailscale/setup-serve.sh
+```
+
+**Requis**: credentials GCP (service account ou OAuth) dans `deployment/asr-proxy/gcp-credentials.json`.
+
+---
+
+## Profil Local GPU
+
+Transcription locale sur GPU NVIDIA. Le plus rapide.
+
+```bash
+# Prerequis: nvidia-container-toolkit
+# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html
+
+# Setup
+bash deployment/setup.sh --profile local-gpu
+```
+
+**Requis**: token HuggingFace pour la diarisation (pyannote).
+
+---
+
+## Profil Local CPU
+
+Transcription sur CPU. Lent mais fonctionnel pour tester.
+
+```bash
+bash deployment/setup.sh --profile local-cpu
+```
+
+Prevoir ~10x le temps reel (1h audio = ~10h de traitement).
+
+---
+
+## Apres l'installation
+
+```bash
+# Verifier que tout fonctionne
+bash deployment/tools/health-check.sh
+
+# Ouvrir DictIA
+open http://localhost:8899
+```
+
+Se connecter avec les identifiants admin configures pendant le setup.
+
+## Commandes utiles
+
+```bash
+# Logs en temps reel
+docker compose -f deployment/docker/docker-compose.<profil>.yml logs -f
+
+# Redemarrer
+docker compose -f deployment/docker/docker-compose.<profil>.yml restart
+
+# Mise a jour
+bash deployment/tools/update.sh
+
+# Backup
+bash deployment/tools/backup.sh
+```
--- a/deployment/docs/TROUBLESHOOTING.md
+++ b/deployment/docs/TROUBLESHOOTING.md
@@ -0,0 +1,177 @@
+# Troubleshooting — DictIA
+
+## WhisperX OOM (Out of Memory)
+
+**Symptome**: Container `whisperx-asr` crash ou restart en boucle.
+
+**Cause**: Modele trop gros pour la RAM/VRAM disponible.
+
+**Solutions**:
+```bash
+# Utiliser un modele plus petit dans .env
+ASR_MODEL=medium   # au lieu de large-v3
+
+# Augmenter la limite memoire (local-cpu)
+# Editer docker-compose.local-cpu.yml
+deploy:
+  resources:
+    limits:
+      memory: 24G   # au lieu de 18G
+```
+
+## Diarisation 403 Forbidden
+
+**Symptome**: Erreur 403 lors de la transcription avec diarisation.
+
+**Cause**: Token HuggingFace manquant ou conditions non acceptees.
+
+**Solution**:
+1. Creer un token: https://huggingface.co/settings/tokens
+2. Accepter les conditions: https://huggingface.co/pyannote/speaker-diarization-3.1
+3. Ajouter dans `.env`:
+```bash
+HF_TOKEN=hf_votre_token
+```
+4. Redemarrer: `docker compose -f deployment/docker/docker-compose.<profil>.yml restart`
+
+## GPU non detecte (local-gpu)
+
+**Symptome**: `nvidia-smi` fonctionne mais Docker ne voit pas le GPU.
+
+**Solution**:
+```bash
+# Installer nvidia-container-toolkit
+sudo apt install -y nvidia-container-toolkit
+sudo nvidia-ctk runtime configure --runtime=docker
+sudo systemctl restart docker
+
+# Verifier
+docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
+```
+
+## Upload echoue (fichiers volumineux)
+
+**Symptome**: Upload de gros fichiers (>100MB) echoue.
+
+**Causes possibles**:
+- Timeout Nginx/reverse proxy
+- Limite upload trop basse
+
+**Solutions**:
+```bash
+# Si Nginx: verifier client_max_body_size dans dictia.conf
+client_max_body_size 500M;
+
+# Si Tailscale Serve: pas de limite cote Tailscale
+
+# Timeout gunicorn (dans le Dockerfile, deja a 600s)
+# Pour des fichiers tres longs, augmenter dans docker-compose:
+environment:
+  - GUNICORN_TIMEOUT=1200
+```
+
+## Container dictia "unhealthy"
+
+**Symptome**: `docker ps` montre "unhealthy" pour le container dictia.
+
+**Diagnostic**:
+```bash
+# Voir les logs
+docker logs dictia --tail 50
+
+# Tester manuellement
+docker exec dictia python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"
+```
+
+**Causes courantes**:
+- `.env` mal configure (SECRET_KEY manquant)
+- Base de donnees corrompue (restaurer backup)
+- Port 8899 deja utilise
+
+## ASR Proxy: "No GPU available"
+
+**Symptome**: Transcription echoue avec "No GPU available in any Canadian zone".
+
+**Causes**:
+- GCP n'a pas de GPU disponible (capacite epuisee)
+- Credentials GCP expirees
+- Budget mensuel atteint
+
+**Diagnostic**:
+```bash
+# Verifier le statut du proxy
+curl -s http://localhost:9090/health | python3 -m json.tool
+
+# Verifier les stats (budget)
+curl -s http://localhost:9090/stats | python3 -m json.tool
+
+# Voir les logs
+journalctl -u asr-proxy --since "1 hour ago"
+```
+
+**Solutions**:
+- Attendre (GCP libere des GPUs regulierement)
+- Le proxy reessaie automatiquement apres un cooldown de 3 minutes
+- Verifier le dashboard: http://localhost:9090
+
+## Build Docker lent/echoue
+
+**Symptome**: `docker build` prend trop de temps ou echoue.
+
+**Solutions**:
+```bash
+# Limiter les ressources si le VPS est petit
+docker build --memory=2g --cpus=2 -t innova-ai/dictia:latest .
+
+# Nettoyer le cache Docker si le disque est plein
+docker builder prune -f
+docker image prune -f
+```
+
+## Base de donnees corrompue
+
+**Symptome**: Erreur SQLite au demarrage.
+
+**Solution**:
+```bash
+# Restaurer le dernier backup
+bash deployment/tools/restore.sh backups/dictia-LATEST.tar.gz
+
+# Ou recreer la base (perd les donnees)
+rm data/instance/transcriptions.db
+docker compose -f deployment/docker/docker-compose.<profil>.yml restart
+```
+
+## Port 8899 deja utilise
+
+```bash
+# Trouver qui utilise le port
+sudo lsof -i :8899
+# ou
+sudo ss -tlnp | grep 8899
+
+# Arreter le processus ou changer le port dans docker-compose
+ports:
+  - "8900:8899"   # utiliser 8900 a la place
+```
+
+## Mise a jour qui casse tout
+
+```bash
+# Rollback: revenir au commit precedent
+cd dictia
+git log --oneline -5  # trouver le bon commit
+git checkout <commit-hash>
+
+# Rebuild et redemarrer
+docker build -t innova-ai/dictia:latest .
+docker compose -f deployment/docker/docker-compose.<profil>.yml down
+docker compose -f deployment/docker/docker-compose.<profil>.yml up -d
+```
+
+## Commande de diagnostic rapide
+
+```bash
+# Tout verifier d'un coup
+bash deployment/tools/health-check.sh --json | python3 -m json.tool
+```
--- a/deployment/docs/VPS-SETUP.md
+++ b/deployment/docs/VPS-SETUP.md
@@ -0,0 +1,148 @@
+# Setup VPS from scratch — DictIA
+
+Guide complet pour deployer DictIA sur un VPS Ubuntu.
+Teste sur OVH VPS avec Ubuntu 22.04/24.04.
+
+## 1. Preparation du VPS
+
+```bash
+# Mise a jour systeme
+sudo apt update && sudo apt upgrade -y
+
+# Installer les essentiels
+sudo apt install -y curl git
+```
+
+## 2. Docker
+
+```bash
+# Installer Docker (methode officielle)
+curl -fsSL https://get.docker.com | sh
+
+# Ajouter l'utilisateur au groupe docker
+sudo usermod -aG docker $USER
+
+# Se reconnecter pour appliquer le groupe
+exit
+# (reconnecter via SSH)
+
+# Verifier
+docker --version
+docker compose version
+```
+
+## 3. Tailscale (recommande)
+
+Tailscale fournit un VPN mesh pour acceder au VPS sans exposer de ports publics.
+
+```bash
+# Installer Tailscale
+curl -fsSL https://tailscale.com/install.sh | sh
+
+# Connecter au tailnet
+sudo tailscale up
+
+# Verifier
+tailscale status
+```
+
+## 4. DictIA
+
+```bash
+# Cloner le repo
+cd ~
+git clone https://gitea.innova-ai.ca/Innova-AI/dictia.git
+cd dictia
+git checkout dictia-branding
+
+# Lancer le setup
+bash deployment/setup.sh --profile cloud
+```
+
+Le setup va:
+- Generer le `.env` avec vos identifiants
+- Creer les repertoires de donnees
+- Builder l'image Docker
+- Demarrer les containers
+
+## 5. ASR Proxy (GCP GPU)
+
+```bash
+# Installer le proxy
+bash deployment/asr-proxy/setup.sh
+
+# Ajouter les credentials GCP
+# Copier votre fichier de credentials dans:
+cp ~/gcp-credentials.json deployment/asr-proxy/gcp-credentials.json
+
+# Demarrer le service
+sudo systemctl start asr-proxy
+sudo systemctl status asr-proxy
+```
+
+## 6. Securite
+
+```bash
+# Docker daemon config (log rotation)
+sudo cp deployment/security/docker-daemon.json /etc/docker/daemon.json
+sudo systemctl restart docker
+
+# Firewall iptables (bloque trafic non-Tailscale)
+sudo bash deployment/security/iptables-rules.sh
+
+# Service systemd pour les regles au boot
+sudo cp deployment/security/docker-iptables.service /etc/systemd/system/
+sudo systemctl daemon-reload
+sudo systemctl enable docker-iptables
+```
+
+## 7. Tailscale Serve (HTTPS)
+
+```bash
+# Expose DictIA et le dashboard ASR via Tailscale HTTPS
+bash deployment/config/tailscale/setup-serve.sh
+
+# Verifier
+tailscale serve status
+```
+
+DictIA sera accessible a `https://votre-hostname.tailnet.ts.net/`.
+
+## 8. Service systemd (auto-start)
+
+```bash
+# Adapter le chemin dans le fichier si necessaire
+sudo cp deployment/config/systemd/dictia.service /etc/systemd/system/
+sudo systemctl daemon-reload
+sudo systemctl enable dictia
+```
+
+## 9. Verification
+
+```bash
+# Health check complet
+bash deployment/tools/health-check.sh
+
+# Verifier les endpoints
+curl -s http://localhost:8899/health
+curl -s http://localhost:9090/health
+```
+
+## 10. Premier backup
+
+```bash
+bash deployment/tools/backup.sh
+```
+
+---
+
+## Checklist post-installation
+
+- [ ] DictIA repond sur :8899
+- [ ] ASR Proxy repond sur :9090
+- [ ] Tailscale Serve configure
+- [ ] iptables: seul Tailscale peut acceder
+- [ ] Docker: log rotation configuree
+- [ ] Service systemd enable (auto-start au boot)
+- [ ] Premier backup effectue
+- [ ] Identifiants admin testes
--- a/deployment/profiles/docker-compose.dictia16.yml
+++ b/deployment/profiles/docker-compose.dictia16.yml
@@ -0,0 +1,101 @@
+# =============================================================================
+# DictIA 16 — Docker Compose
+# GPU : RTX 5070 Ti (16 Go VRAM)
+# =============================================================================
+#
+# Services :
+#   - dictia        : Application principale DictIA
+#   - whisperx-asr  : Service de transcription WhisperX Large-v3
+#   - ollama        : LLM local Mistral 7B (résumés, chat, Q&A)
+#
+# Démarrage :
+#   1. cp config/env.dictia16.example .env
+#   2. docker compose -f config/docker-compose.dictia16.yml up -d
+#   3. Télécharger Mistral : docker exec ollama ollama pull mistral
+#
+# Note : Aucune clé API nécessaire — tout tourne en local (100% privé).
+# =============================================================================
+
+services:
+
+  # ---------------------------------------------------------------------------
+  # Application DictIA
+  # ---------------------------------------------------------------------------
+  dictia:
+    image: dictia:latest
+    container_name: dictia
+    restart: unless-stopped
+    ports:
+      - "8899:8899"
+    env_file:
+      - ../.env
+    environment:
+      - LOG_LEVEL=ERROR
+    volumes:
+      - ../uploads:/data/uploads
+      - ../instance:/data/instance
+      # Décommenter pour l'export automatique :
+      # - ../exports:/data/exports
+      # Décommenter pour le traitement automatique :
+      # - ../auto-process:/data/auto-process
+    depends_on:
+      - whisperx-asr
+      - ollama
+    networks:
+      - dictia-net
+
+  # ---------------------------------------------------------------------------
+  # WhisperX ASR — Transcription locale (WhisperX Large-v3)
+  # RTX 5070 Ti : BATCH_SIZE=32, COMPUTE_TYPE=float16
+  # ---------------------------------------------------------------------------
+  whisperx-asr:
+    image: murtazanasir/whisperx-asr-service:latest
+    container_name: whisperx-asr
+    restart: unless-stopped
+    environment:
+      - HF_TOKEN=${HF_TOKEN}
+      - DEVICE=cuda
+      - COMPUTE_TYPE=float16
+      - BATCH_SIZE=32
+      - DEFAULT_MODEL=large-v3
+    volumes:
+      - whisperx-models:/root/.cache
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    networks:
+      - dictia-net
+
+  # ---------------------------------------------------------------------------
+  # Ollama — LLM local Mistral 7B
+  # Résumés, points d'action, Q&A — 100% local, aucune donnée externe
+  # ---------------------------------------------------------------------------
+  ollama:
+    image: ollama/ollama:latest
+    container_name: ollama
+    restart: unless-stopped
+    volumes:
+      - ollama-models:/root/.ollama
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    networks:
+      - dictia-net
+
+networks:
+  dictia-net:
+    driver: bridge
+
+volumes:
+  whisperx-models:
+    driver: local
+  ollama-models:
+    driver: local
--- a/deployment/profiles/docker-compose.dictia8.yml
+++ b/deployment/profiles/docker-compose.dictia8.yml
@@ -0,0 +1,75 @@
+# =============================================================================
+# DictIA 8 — Docker Compose
+# GPU : RTX 5060 (8 Go VRAM)
+# =============================================================================
+#
+# Services :
+#   - dictia        : Application principale DictIA
+#   - whisperx-asr  : Service de transcription WhisperX Large-v3
+#
+# Démarrage :
+#   1. cp config/env.dictia8.example .env
+#   2. Remplir TEXT_MODEL_API_KEY dans .env
+#   3. docker compose -f config/docker-compose.dictia8.yml up -d
+# =============================================================================
+
+services:
+
+  # ---------------------------------------------------------------------------
+  # Application DictIA
+  # ---------------------------------------------------------------------------
+  dictia:
+    image: dictia:latest
+    container_name: dictia
+    restart: unless-stopped
+    ports:
+      - "8899:8899"
+    env_file:
+      - ../.env
+    environment:
+      - LOG_LEVEL=ERROR
+    volumes:
+      - ../uploads:/data/uploads
+      - ../instance:/data/instance
+      # Décommenter pour l'export automatique :
+      # - ../exports:/data/exports
+      # Décommenter pour le traitement automatique :
+      # - ../auto-process:/data/auto-process
+    depends_on:
+      - whisperx-asr
+    networks:
+      - dictia-net
+
+  # ---------------------------------------------------------------------------
+  # WhisperX ASR — Transcription locale (WhisperX Large-v3)
+  # RTX 5060 : BATCH_SIZE=16, COMPUTE_TYPE=float16
+  # ---------------------------------------------------------------------------
+  whisperx-asr:
+    image: murtazanasir/whisperx-asr-service:latest
+    container_name: whisperx-asr
+    restart: unless-stopped
+    environment:
+      - HF_TOKEN=${HF_TOKEN}
+      - DEVICE=cuda
+      - COMPUTE_TYPE=float16
+      - BATCH_SIZE=16
+      - DEFAULT_MODEL=large-v3
+    volumes:
+      - whisperx-models:/root/.cache
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    networks:
+      - dictia-net
+
+networks:
+  dictia-net:
+    driver: bridge
+
+volumes:
+  whisperx-models:
+    driver: local
--- a/deployment/profiles/env.dictia16.example
+++ b/deployment/profiles/env.dictia16.example
@@ -0,0 +1,134 @@
+# =============================================================================
+# DictIA 16 — Configuration (.env)
+# GPU : RTX 5070 Ti (16 Go VRAM)
+# =============================================================================
+#
+# Architecture :
+#   - Transcription  : WhisperX Large-v3 (local, ~5,5 Go VRAM)
+#   - LLM (résumés)  : Mistral 7B local via Ollama (~6,4 Go VRAM)
+#   - Mode           : Séquentiel (transcription puis résumé)
+#   - Total VRAM     : ~11,9 Go / 16 Go (marge ~4,1 Go)
+#
+# Démarrage rapide :
+#   1. cp config/env.dictia16.example .env
+#   2. Aucune clé API nécessaire — tout tourne en local
+#   3. docker compose -f config/docker-compose.dictia16.yml up -d
+# =============================================================================
+
+# =============================================================================
+# MODÈLE DE TEXTE — Résumés, titres, chat (LLM LOCAL)
+# =============================================================================
+# DictIA 16 utilise Mistral 7B en local via Ollama.
+# Aucune donnée ne quitte le serveur — 100% privé.
+
+TEXT_MODEL_BASE_URL=http://ollama:11434/v1
+TEXT_MODEL_API_KEY=not-required
+TEXT_MODEL_NAME=mistral
+
+# --- Modèle de chat séparé (optionnel) ---
+# Même modèle par défaut, mais peut être changé pour un modèle plus rapide.
+# CHAT_MODEL_API_KEY=not-required
+# CHAT_MODEL_BASE_URL=http://ollama:11434/v1
+# CHAT_MODEL_NAME=mistral
+
+# =============================================================================
+# TRANSCRIPTION — WhisperX ASR local (REQUIS)
+# =============================================================================
+# WhisperX tourne en local dans un conteneur Docker séparé.
+# Le service ASR est défini dans docker-compose.dictia16.yml.
+
+ASR_BASE_URL=http://whisperx-asr:9000
+
+# Diarisation (identification automatique des locuteurs) — recommandé
+ASR_DIARIZE=true
+ASR_RETURN_SPEAKER_EMBEDDINGS=true
+
+# Nombre de locuteurs attendus (optionnel — aide la précision)
+# ASR_MIN_SPEAKERS=1
+# ASR_MAX_SPEAKERS=6
+
+# =============================================================================
+# PARAMÈTRES ADMINISTRATEUR
+# =============================================================================
+ADMIN_USERNAME=admin
+ADMIN_EMAIL=admin@votreentreprise.com
+ADMIN_PASSWORD=changeme
+
+# =============================================================================
+# ACCÈS ET INSCRIPTION
+# =============================================================================
+# Désactiver l'inscription publique (accès sur invitation uniquement)
+ALLOW_REGISTRATION=false
+
+# Restreindre l'inscription aux domaines autorisés
+# Exemple : REGISTRATION_ALLOWED_DOMAINS=votreentreprise.com
+REGISTRATION_ALLOWED_DOMAINS=
+
+# =============================================================================
+# FUSEAU HORAIRE
+# =============================================================================
+# Exemples : America/Toronto, America/Montreal, America/New_York, UTC
+TIMEZONE="America/Toronto"
+
+# =============================================================================
+# LIMITES DE TOKENS
+# =============================================================================
+SUMMARY_MAX_TOKENS=8000
+CHAT_MAX_TOKENS=5000
+
+# =============================================================================
+# COMPRESSION AUDIO
+# =============================================================================
+AUDIO_COMPRESS_UPLOADS=true
+AUDIO_CODEC=mp3
+AUDIO_BITRATE=128k
+
+# =============================================================================
+# FONCTIONNALITÉS OPTIONNELLES
+# =============================================================================
+
+# Inquire Mode — recherche IA sur tous les enregistrements
+# Peut être activé sur DictIA 16 (plus de VRAM disponible)
+ENABLE_INQUIRE_MODE=false
+
+# Traitement automatique de fichiers (dossier surveillé)
+ENABLE_AUTO_PROCESSING=false
+# AUTO_PROCESS_MODE=admin_only
+# AUTO_PROCESS_WATCH_DIR=/data/auto-process
+
+# Export automatique
+ENABLE_AUTO_EXPORT=false
+# AUTO_EXPORT_DIR=/data/exports
+# AUTO_EXPORT_TRANSCRIPTION=true
+# AUTO_EXPORT_SUMMARY=true
+
+# Suppression automatique / rétention
+ENABLE_AUTO_DELETION=false
+# GLOBAL_RETENTION_DAYS=90
+# DELETION_MODE=audio_only
+
+# =============================================================================
+# PARTAGE
+# =============================================================================
+ENABLE_INTERNAL_SHARING=false
+ENABLE_PUBLIC_SHARING=true
+USERS_CAN_DELETE=true
+
+# =============================================================================
+# FILES D'ATTENTE DE TRAITEMENT
+# =============================================================================
+JOB_QUEUE_WORKERS=2
+SUMMARY_QUEUE_WORKERS=2
+JOB_MAX_RETRIES=3
+
+# =============================================================================
+# BASE DE DONNÉES ET STOCKAGE
+# =============================================================================
+SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
+UPLOAD_FOLDER=/data/uploads
+
+# =============================================================================
+# JOURNALISATION
+# =============================================================================
+# ERROR = production (minimal), INFO = débogage, DEBUG = développement
+LOG_LEVEL=ERROR
--- a/deployment/profiles/env.dictia8.example
+++ b/deployment/profiles/env.dictia8.example
@@ -0,0 +1,126 @@
+# =============================================================================
+# DictIA 8 — Configuration (.env)
+# GPU : RTX 5060 (8 Go VRAM)
+# =============================================================================
+#
+# Architecture :
+#   - Transcription  : WhisperX Large-v3 (local, ~5,5 Go VRAM)
+#   - LLM (résumés)  : API cloud via OpenRouter (VRAM insuffisante pour LLM local)
+#
+# Démarrage rapide :
+#   1. cp config/env.dictia8.example .env
+#   2. Remplir TRANSCRIPTION_API_KEY et TEXT_MODEL_API_KEY
+#   3. docker compose -f config/docker-compose.dictia8.yml up -d
+# =============================================================================
+
+# =============================================================================
+# MODÈLE DE TEXTE — Résumés, titres, chat (REQUIS)
+# =============================================================================
+# DictIA 8 utilise un LLM cloud via OpenRouter (VRAM insuffisante pour LLM local).
+# Inscrivez-vous sur https://openrouter.ai pour obtenir une clé API.
+
+TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
+TEXT_MODEL_API_KEY=votre_cle_openrouter
+TEXT_MODEL_NAME=openai/gpt-4o-mini
+
+# =============================================================================
+# TRANSCRIPTION — WhisperX ASR local (REQUIS)
+# =============================================================================
+# WhisperX tourne en local dans un conteneur Docker séparé.
+# Le service ASR est défini dans docker-compose.dictia8.yml.
+
+ASR_BASE_URL=http://whisperx-asr:9000
+
+# Diarisation (identification automatique des locuteurs) — recommandé
+ASR_DIARIZE=true
+ASR_RETURN_SPEAKER_EMBEDDINGS=true
+
+# Nombre de locuteurs attendus (optionnel — aide la précision)
+# ASR_MIN_SPEAKERS=1
+# ASR_MAX_SPEAKERS=6
+
+# =============================================================================
+# PARAMÈTRES ADMINISTRATEUR
+# =============================================================================
+ADMIN_USERNAME=admin
+ADMIN_EMAIL=admin@votreentreprise.com
+ADMIN_PASSWORD=changeme
+
+# =============================================================================
+# ACCÈS ET INSCRIPTION
+# =============================================================================
+# Désactiver l'inscription publique (accès sur invitation uniquement)
+ALLOW_REGISTRATION=false
+
+# Restreindre l'inscription aux domaines autorisés
+# Exemple : REGISTRATION_ALLOWED_DOMAINS=votreentreprise.com
+REGISTRATION_ALLOWED_DOMAINS=
+
+# =============================================================================
+# FUSEAU HORAIRE
+# =============================================================================
+# Exemples : America/Toronto, America/Montreal, America/New_York, UTC
+TIMEZONE="America/Toronto"
+
+# =============================================================================
+# LIMITES DE TOKENS
+# =============================================================================
+SUMMARY_MAX_TOKENS=8000
+CHAT_MAX_TOKENS=5000
+
+# =============================================================================
+# COMPRESSION AUDIO
+# =============================================================================
+AUDIO_COMPRESS_UPLOADS=true
+AUDIO_CODEC=mp3
+AUDIO_BITRATE=128k
+
+# =============================================================================
+# FONCTIONNALITÉS OPTIONNELLES
+# =============================================================================
+
+# Inquire Mode — recherche IA sur tous les enregistrements
+# Désactivé sur DictIA 8 (VRAM insuffisante pour embeddings locaux)
+ENABLE_INQUIRE_MODE=false
+
+# Traitement automatique de fichiers (dossier surveillé)
+ENABLE_AUTO_PROCESSING=false
+# AUTO_PROCESS_MODE=admin_only
+# AUTO_PROCESS_WATCH_DIR=/data/auto-process
+
+# Export automatique
+ENABLE_AUTO_EXPORT=false
+# AUTO_EXPORT_DIR=/data/exports
+# AUTO_EXPORT_TRANSCRIPTION=true
+# AUTO_EXPORT_SUMMARY=true
+
+# Suppression automatique / rétention
+ENABLE_AUTO_DELETION=false
+# GLOBAL_RETENTION_DAYS=90
+# DELETION_MODE=audio_only
+
+# =============================================================================
+# PARTAGE
+# =============================================================================
+ENABLE_INTERNAL_SHARING=false
+ENABLE_PUBLIC_SHARING=true
+USERS_CAN_DELETE=true
+
+# =============================================================================
+# FILES D'ATTENTE DE TRAITEMENT
+# =============================================================================
+JOB_QUEUE_WORKERS=2
+SUMMARY_QUEUE_WORKERS=2
+JOB_MAX_RETRIES=3
+
+# =============================================================================
+# BASE DE DONNÉES ET STOCKAGE
+# =============================================================================
+SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
+UPLOAD_FOLDER=/data/uploads
+
+# =============================================================================
+# JOURNALISATION
+# =============================================================================
+# ERROR = production (minimal), INFO = débogage, DEBUG = développement
+LOG_LEVEL=ERROR
--- a/deployment/security/docker-daemon.json
+++ b/deployment/security/docker-daemon.json
@@ -0,0 +1,8 @@
+{
+    "log-driver": "json-file",
+    "log-opts": {
+        "max-size": "10m",
+        "max-file": "3"
+    },
+    "storage-driver": "overlay2"
+}
--- a/deployment/security/docker-iptables.service
+++ b/deployment/security/docker-iptables.service
@@ -0,0 +1,12 @@
+[Unit]
+Description=DictIA Docker iptables rules
+After=docker.service tailscaled.service
+Requires=docker.service
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+ExecStart=/bin/bash /opt/dictia/deployment/security/iptables-rules.sh
+
+[Install]
+WantedBy=multi-user.target
--- a/deployment/security/iptables-rules.sh
+++ b/deployment/security/iptables-rules.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+# DictIA — iptables rules for cloud VPS
+#
+# Allows Docker internal traffic to reach the ASR proxy on port 9090.
+# Blocks direct external access to Docker container IPs.
+# Tailscale + UFW handle the main firewall — this script adds Docker-specific rules.
+#
+# Usage: sudo bash iptables-rules.sh
+set -euo pipefail
+
+echo "=== DictIA iptables rules ==="
+
+# Allow Docker containers (172.16.0.0/12) to reach ASR proxy on port 9090
+# This rule goes BEFORE the default DROP policy so containers can talk to the proxy
+iptables -C INPUT -s 172.16.0.0/12 -p tcp --dport 9090 -j ACCEPT 2>/dev/null \
+    || iptables -I INPUT 1 -s 172.16.0.0/12 -p tcp --dport 9090 -j ACCEPT
+
+# Block direct external access to Docker container IPs (raw table, before conntrack)
+# Protects containers on non-default bridge networks (e.g., dictia-network)
+for NETWORK_ID in $(docker network ls --filter driver=bridge --format '{{.ID}}' 2>/dev/null); do
+    BRIDGE=$(docker network inspect "$NETWORK_ID" --format '{{.Options.com.docker.network.bridge.name}}' 2>/dev/null || echo "")
+    [ -z "$BRIDGE" ] && continue
+    [ "$BRIDGE" = "docker0" ] && continue
+
+    for CONTAINER_IP in $(docker network inspect "$NETWORK_ID" \
+        --format '{{range .Containers}}{{.IPv4Address}} {{end}}' 2>/dev/null); do
+        IP="${CONTAINER_IP%/*}"
+        [ -z "$IP" ] && continue
+        iptables -t raw -C PREROUTING -d "$IP" ! -i "$BRIDGE" -j DROP 2>/dev/null \
+            || iptables -t raw -A PREROUTING -d "$IP" ! -i "$BRIDGE" -j DROP
+        echo "  Protected $IP on $BRIDGE"
+    done
+done
+
+echo "Rules applied. Tailscale + Docker internal traffic allowed."
+echo "Verify with: sudo iptables -L -n -t raw"
--- a/deployment/setup.sh
+++ b/deployment/setup.sh
@@ -0,0 +1,300 @@
+#!/usr/bin/env bash
+# DictIA — Main setup script
+#
+# Interactive installer that detects hardware and configures the appropriate
+# deployment profile (cloud, local-cpu, local-gpu).
+#
+# Usage:
+#   bash deployment/setup.sh                    # Interactive mode
+#   bash deployment/setup.sh --profile cloud    # Non-interactive
+#   bash deployment/setup.sh --profile local-gpu
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
+PROFILE=""
+
+for arg in "$@"; do
+    case "$arg" in
+        --profile=*) PROFILE="${arg#*=}" ;;
+        --profile)   shift_next=true ;;
+        *)
+            if [ "${shift_next:-false}" = true ]; then
+                PROFILE="$arg"
+                shift_next=false
+            fi
+            ;;
+    esac
+done
+
+# --- Colors ---
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+info()  { echo -e "${CYAN}[INFO]${NC} $*"; }
+ok()    { echo -e "${GREEN}[OK]${NC} $*"; }
+warn()  { echo -e "${YELLOW}[WARN]${NC} $*"; }
+err()   { echo -e "${RED}[ERROR]${NC} $*"; }
+
+echo
+echo -e "${CYAN}========================================${NC}"
+echo -e "${CYAN}  DictIA — Setup${NC}"
+echo -e "${CYAN}========================================${NC}"
+echo
+
+# ==========================================================================
+# 1. Hardware Detection
+# ==========================================================================
+info "Detecting hardware..."
+
+# Docker
+if command -v docker &>/dev/null && docker info &>/dev/null; then
+    DOCKER_VERSION=$(docker --version | grep -oP '\d+\.\d+\.\d+' | head -1)
+    ok "Docker $DOCKER_VERSION"
+else
+    err "Docker not found or not running."
+    echo "  Install Docker: https://docs.docker.com/engine/install/"
+    exit 1
+fi
+
+# Docker Compose
+if docker compose version &>/dev/null; then
+    COMPOSE_VERSION=$(docker compose version --short 2>/dev/null || echo "unknown")
+    ok "Docker Compose $COMPOSE_VERSION"
+else
+    err "Docker Compose not found."
+    echo "  Docker Compose V2 is required (comes with Docker Desktop or docker-compose-plugin)"
+    exit 1
+fi
+
+# GPU
+HAS_GPU=false
+if command -v nvidia-smi &>/dev/null; then
+    GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1 || echo "")
+    if [ -n "$GPU_NAME" ]; then
+        HAS_GPU=true
+        ok "NVIDIA GPU: $GPU_NAME"
+        # Check nvidia-container-toolkit
+        if docker info 2>/dev/null | grep -qi nvidia; then
+            ok "nvidia-container-toolkit detected"
+        else
+            warn "nvidia-container-toolkit not detected. Required for local-gpu profile."
+            echo "  Install: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
+        fi
+    fi
+else
+    info "No NVIDIA GPU detected"
+fi
+
+# RAM
+if command -v free &>/dev/null; then
+    RAM_GB=$(free -g | awk '/Mem:/{print $2}')
+    info "RAM: ${RAM_GB}GB"
+fi
+
+# Disk
+DISK_AVAIL=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $4}')
+info "Disk available: $DISK_AVAIL"
+
+echo
+
+# ==========================================================================
+# 2. Profile Selection
+# ==========================================================================
+if [ -z "$PROFILE" ]; then
+    echo -e "${CYAN}Select deployment profile:${NC}"
+    echo
+    echo "  1) cloud      — VPS with ASR Proxy (GCP GPU on demand)"
+    echo "                   Best for: remote servers, pay-per-use GPU"
+    echo
+    echo "  2) local-gpu  — Local NVIDIA GPU for transcription"
+    echo "                   Best for: dedicated GPU server, fastest"
+    if [ "$HAS_GPU" = false ]; then
+        echo -e "                   ${YELLOW}(No GPU detected on this machine)${NC}"
+    fi
+    echo
+    echo "  3) local-cpu  — CPU-only transcription (slow)"
+    echo "                   Best for: testing, low-volume usage"
+    echo
+    read -rp "Choice [1-3]: " CHOICE
+    case "$CHOICE" in
+        1) PROFILE="cloud" ;;
+        2) PROFILE="local-gpu" ;;
+        3) PROFILE="local-cpu" ;;
+        *) err "Invalid choice"; exit 1 ;;
+    esac
+fi
+
+COMPOSE_FILE="$SCRIPT_DIR/docker/docker-compose.$PROFILE.yml"
+if [ ! -f "$COMPOSE_FILE" ]; then
+    err "Compose file not found: $COMPOSE_FILE"
+    exit 1
+fi
+
+ok "Profile: $PROFILE"
+echo
+
+# ==========================================================================
+# 3. Generate .env
+# ==========================================================================
+ENV_FILE="$PROJECT_DIR/.env"
+
+if [ -f "$ENV_FILE" ]; then
+    warn ".env already exists. Keeping existing configuration."
+    echo "  To reconfigure, delete .env and re-run setup."
+else
+    info "Generating .env..."
+
+    # Generate secret key
+    SECRET_KEY=$(python3 -c "import secrets; print(secrets.token_hex(32))" 2>/dev/null \
+        || openssl rand -hex 32 2>/dev/null \
+        || head -c 64 /dev/urandom | xxd -p | head -c 64)
+
+    # Prompt for admin credentials
+    read -rp "Admin username [admin]: " ADMIN_USER
+    ADMIN_USER="${ADMIN_USER:-admin}"
+    read -rp "Admin email [admin@example.com]: " ADMIN_EMAIL
+    ADMIN_EMAIL="${ADMIN_EMAIL:-admin@example.com}"
+    read -rsp "Admin password: " ADMIN_PASS
+    echo
+    ADMIN_PASS="${ADMIN_PASS:-changeme}"
+
+    # Prompt for text model API key
+    echo
+    info "DictIA needs a text/LLM API key for summaries, titles, and chat."
+    echo "  Recommended: OpenRouter (https://openrouter.ai) — access to many models"
+    read -rp "Text model API key (or press Enter to skip): " TEXT_API_KEY
+    TEXT_API_KEY="${TEXT_API_KEY:-your_openrouter_api_key}"
+
+    # HuggingFace token for diarization
+    if [ "$PROFILE" != "cloud" ]; then
+        echo
+        info "For speaker diarization, a HuggingFace token is needed."
+        echo "  Get one at: https://huggingface.co/settings/tokens"
+        echo "  Accept model: https://huggingface.co/pyannote/speaker-diarization-3.1"
+        read -rp "HuggingFace token (or press Enter to skip): " HF_TOKEN
+        HF_TOKEN="${HF_TOKEN:-}"
+    else
+        HF_TOKEN=""
+    fi
+
+    # Write .env
+    cp "$SCRIPT_DIR/docker/.env.example" "$ENV_FILE"
+    sed -i "s|SECRET_KEY=.*|SECRET_KEY=$SECRET_KEY|" "$ENV_FILE"
+    sed -i "s|DICTIA_PROFILE=.*|DICTIA_PROFILE=$PROFILE|" "$ENV_FILE"
+    sed -i "s|ADMIN_USERNAME=.*|ADMIN_USERNAME=$ADMIN_USER|" "$ENV_FILE"
+    sed -i "s|ADMIN_EMAIL=.*|ADMIN_EMAIL=$ADMIN_EMAIL|" "$ENV_FILE"
+    sed -i "s|ADMIN_PASSWORD=.*|ADMIN_PASSWORD=$ADMIN_PASS|" "$ENV_FILE"
+    sed -i "s|TEXT_MODEL_API_KEY=.*|TEXT_MODEL_API_KEY=$TEXT_API_KEY|" "$ENV_FILE"
+    sed -i "s|HF_TOKEN=.*|HF_TOKEN=$HF_TOKEN|" "$ENV_FILE"
+
+    ok ".env generated"
+fi
+echo
+
+# ==========================================================================
+# 4. Create data directories
+# ==========================================================================
+info "Creating data directories..."
+mkdir -p "$PROJECT_DIR/data/uploads" "$PROJECT_DIR/data/instance"
+ok "data/uploads and data/instance created"
+echo
+
+# ==========================================================================
+# 5. Profile-specific setup
+# ==========================================================================
+case "$PROFILE" in
+    cloud)
+        info "Cloud profile — setting up ASR Proxy..."
+        if [ -f "$SCRIPT_DIR/asr-proxy/setup.sh" ]; then
+            echo "  Run the ASR proxy setup separately:"
+            echo "    bash $SCRIPT_DIR/asr-proxy/setup.sh"
+        fi
+        echo
+        info "Setting up iptables rules..."
+        if [ -f "$SCRIPT_DIR/security/iptables-rules.sh" ] && [ "$(id -u)" -eq 0 ]; then
+            bash "$SCRIPT_DIR/security/iptables-rules.sh"
+        else
+            echo "  Run as root: sudo bash $SCRIPT_DIR/security/iptables-rules.sh"
+        fi
+        echo
+        info "Setting up Tailscale Serve..."
+        if command -v tailscale &>/dev/null; then
+            echo "  Run: bash $SCRIPT_DIR/config/tailscale/setup-serve.sh"
+        else
+            warn "Tailscale not installed."
+            echo "  Install: curl -fsSL https://tailscale.com/install.sh | sh"
+        fi
+        ;;
+    local-gpu)
+        info "Local GPU profile — verifying NVIDIA runtime..."
+        if docker info 2>/dev/null | grep -qi nvidia; then
+            ok "NVIDIA Docker runtime available"
+            # Quick GPU test
+            if docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi &>/dev/null; then
+                ok "GPU test passed"
+            else
+                warn "GPU test failed. Check nvidia-container-toolkit installation."
+            fi
+        else
+            err "NVIDIA Docker runtime not found."
+            echo "  Install nvidia-container-toolkit and restart Docker."
+            echo "  https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
+        fi
+        ;;
+    local-cpu)
+        warn "CPU-only transcription is significantly slower than GPU."
+        echo "  Expect ~10x real-time (1h audio = ~10h processing)."
+        echo "  Consider local-gpu or cloud profile for better performance."
+        ;;
+esac
+
+echo
+
+# ==========================================================================
+# 6. Build and start
+# ==========================================================================
+info "Building DictIA Docker image..."
+cd "$PROJECT_DIR"
+docker build -t innova-ai/dictia:latest .
+ok "Image built"
+
+echo
+info "Starting DictIA ($PROFILE profile)..."
+docker compose -f "$COMPOSE_FILE" up -d
+ok "Containers started"
+
+# ==========================================================================
+# 7. Health check
+# ==========================================================================
+echo
+info "Waiting for DictIA to become healthy..."
+RETRIES=30
+for i in $(seq 1 $RETRIES); do
+    if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
+        ok "DictIA is healthy!"
+        break
+    fi
+    if [ "$i" -eq "$RETRIES" ]; then
+        warn "Health check timeout. Check logs: docker compose -f $COMPOSE_FILE logs"
+    fi
+    sleep 5
+done
+
+echo
+echo -e "${GREEN}========================================${NC}"
+echo -e "${GREEN}  DictIA is ready!${NC}"
+echo -e "${GREEN}========================================${NC}"
+echo
+echo "  App:       http://localhost:8899"
+echo "  Profile:   $PROFILE"
+echo "  Compose:   $COMPOSE_FILE"
+echo
+echo "  Tools:"
+echo "    Update:       bash deployment/tools/update.sh"
+echo "    Backup:       bash deployment/tools/backup.sh"
+echo "    Health check: bash deployment/tools/health-check.sh"
+echo
--- a/deployment/tools/backup.sh
+++ b/deployment/tools/backup.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+# DictIA — Backup script
+#
+# Creates a timestamped backup of data, env, and Docker volumes.
+# Keeps the last N backups (default: 5).
+#
+# Usage: bash backup.sh [BACKUP_DIR]
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+BACKUP_BASE="${1:-$PROJECT_DIR/backups}"
+KEEP_COUNT=5
+TIMESTAMP=$(date +%Y%m%d-%H%M%S)
+BACKUP_DIR="$BACKUP_BASE/dictia-$TIMESTAMP"
+
+echo "=== DictIA Backup ==="
+echo "Project: $PROJECT_DIR"
+echo "Backup:  $BACKUP_DIR"
+echo
+
+mkdir -p "$BACKUP_DIR"
+
+# 1. Data directory
+if [ -d "$PROJECT_DIR/data" ]; then
+    echo "[1/4] Backing up data/..."
+    cp -a "$PROJECT_DIR/data" "$BACKUP_DIR/data"
+else
+    echo "[1/4] No data/ directory found, skipping."
+fi
+
+# 2. Environment file
+if [ -f "$PROJECT_DIR/.env" ]; then
+    echo "[2/4] Backing up .env..."
+    cp "$PROJECT_DIR/.env" "$BACKUP_DIR/dot-env"
+else
+    echo "[2/4] No .env found, skipping."
+fi
+
+# 3. ASR Proxy stats
+ASR_STATS="$PROJECT_DIR/deployment/asr-proxy/usage-stats.json"
+if [ -f "$ASR_STATS" ]; then
+    echo "[3/4] Backing up ASR proxy stats..."
+    cp "$ASR_STATS" "$BACKUP_DIR/asr-usage-stats.json"
+else
+    echo "[3/4] No ASR proxy stats, skipping."
+fi
+
+# 4. Docker volumes (if using managed volumes)
+echo "[4/4] Checking Docker volumes..."
+if docker volume ls --format '{{.Name}}' 2>/dev/null | grep -q "whisperx-cache"; then
+    echo "  Exporting whisperx-cache volume..."
+    docker run --rm -v whisperx-cache:/source -v "$BACKUP_DIR":/backup \
+        alpine tar czf /backup/whisperx-cache.tar.gz -C /source . 2>/dev/null || true
+fi
+
+# Write manifest
+cat > "$BACKUP_DIR/manifest.json" <<MANIFEST
+{
+    "timestamp": "$TIMESTAMP",
+    "project_dir": "$PROJECT_DIR",
+    "hostname": "$(hostname)",
+    "contents": {
+        "data": $([ -d "$BACKUP_DIR/data" ] && echo "true" || echo "false"),
+        "env": $([ -f "$BACKUP_DIR/dot-env" ] && echo "true" || echo "false"),
+        "asr_stats": $([ -f "$BACKUP_DIR/asr-usage-stats.json" ] && echo "true" || echo "false"),
+        "whisperx_cache": $([ -f "$BACKUP_DIR/whisperx-cache.tar.gz" ] && echo "true" || echo "false")
+    }
+}
+MANIFEST
+
+# Compress
+echo
+echo "Compressing backup..."
+ARCHIVE="$BACKUP_BASE/dictia-$TIMESTAMP.tar.gz"
+tar czf "$ARCHIVE" -C "$BACKUP_BASE" "dictia-$TIMESTAMP"
+rm -rf "$BACKUP_DIR"
+echo "Archive: $ARCHIVE ($(du -h "$ARCHIVE" | cut -f1))"
+
+# Rotate old backups
+BACKUP_COUNT=$(ls -1 "$BACKUP_BASE"/dictia-*.tar.gz 2>/dev/null | wc -l)
+if [ "$BACKUP_COUNT" -gt "$KEEP_COUNT" ]; then
+    echo
+    echo "Rotating backups (keeping last $KEEP_COUNT)..."
+    ls -1t "$BACKUP_BASE"/dictia-*.tar.gz | tail -n +"$((KEEP_COUNT + 1))" | xargs rm -f
+fi
+
+echo
+echo "=== Backup complete ==="
--- a/deployment/tools/health-check.sh
+++ b/deployment/tools/health-check.sh
@@ -0,0 +1,157 @@
+#!/usr/bin/env bash
+# DictIA — Health check diagnostic
+#
+# Checks Docker, containers, endpoints, disk, RAM, and GPU.
+#
+# Usage:
+#   bash health-check.sh              # Human-readable output
+#   bash health-check.sh --json       # JSON output
+#   bash health-check.sh --quiet      # Exit code only (0=ok, 1=issue)
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+OUTPUT="human"
+ISSUES=0
+
+for arg in "$@"; do
+    case "$arg" in
+        --json)  OUTPUT="json" ;;
+        --quiet) OUTPUT="quiet" ;;
+    esac
+done
+
+declare -A CHECKS
+
+check() {
+    local name="$1"
+    local status="$2"
+    local detail="${3:-}"
+    CHECKS["$name"]="$status|$detail"
+    if [ "$status" = "error" ] || [ "$status" = "warning" ]; then
+        ISSUES=$((ISSUES + 1))
+    fi
+}
+
+# --- Docker ---
+if command -v docker &>/dev/null && docker info &>/dev/null; then
+    check "docker" "ok" "Docker daemon running"
+else
+    check "docker" "error" "Docker not available"
+fi
+
+# --- Containers ---
+DICTIA_STATUS=$(docker inspect --format='{{.State.Health.Status}}' dictia 2>/dev/null || echo "not_found")
+if [ "$DICTIA_STATUS" = "healthy" ]; then
+    check "container_dictia" "ok" "healthy"
+elif [ "$DICTIA_STATUS" = "not_found" ]; then
+    check "container_dictia" "error" "container not found"
+else
+    check "container_dictia" "warning" "$DICTIA_STATUS"
+fi
+
+WHISPERX_STATUS=$(docker inspect --format='{{.State.Status}}' whisperx-asr 2>/dev/null || echo "not_found")
+if [ "$WHISPERX_STATUS" = "running" ]; then
+    check "container_whisperx" "ok" "running"
+elif [ "$WHISPERX_STATUS" = "not_found" ]; then
+    check "container_whisperx" "info" "not present (cloud profile?)"
+else
+    check "container_whisperx" "warning" "$WHISPERX_STATUS"
+fi
+
+# --- Endpoints ---
+if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
+    check "endpoint_dictia" "ok" "http://localhost:8899 responding"
+else
+    check "endpoint_dictia" "error" "http://localhost:8899 not responding"
+fi
+
+if curl -sf -o /dev/null -m 5 http://localhost:9000/health 2>/dev/null; then
+    check "endpoint_whisperx" "ok" "http://localhost:9000 responding"
+else
+    check "endpoint_whisperx" "info" "http://localhost:9000 not responding"
+fi
+
+if curl -sf -o /dev/null -m 5 http://localhost:9090/health 2>/dev/null; then
+    check "endpoint_asr_proxy" "ok" "http://localhost:9090 responding"
+else
+    check "endpoint_asr_proxy" "info" "http://localhost:9090 not responding"
+fi
+
+# --- Disk ---
+DISK_USED=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $5}' | tr -d '%')
+if [ -n "$DISK_USED" ]; then
+    if [ "$DISK_USED" -gt 90 ]; then
+        check "disk" "error" "${DISK_USED}% used"
+    elif [ "$DISK_USED" -gt 80 ]; then
+        check "disk" "warning" "${DISK_USED}% used"
+    else
+        check "disk" "ok" "${DISK_USED}% used"
+    fi
+fi
+
+# --- RAM ---
+if command -v free &>/dev/null; then
+    MEM_TOTAL=$(free -m | awk '/Mem:/{print $2}')
+    MEM_AVAIL=$(free -m | awk '/Mem:/{print $7}')
+    MEM_USED_PCT=$(( (MEM_TOTAL - MEM_AVAIL) * 100 / MEM_TOTAL ))
+    if [ "$MEM_USED_PCT" -gt 90 ]; then
+        check "memory" "warning" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
+    else
+        check "memory" "ok" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
+    fi
+fi
+
+# --- GPU ---
+if command -v nvidia-smi &>/dev/null; then
+    GPU_INFO=$(nvidia-smi --query-gpu=name,memory.used,memory.total --format=csv,noheader 2>/dev/null || echo "error")
+    if [ "$GPU_INFO" != "error" ]; then
+        check "gpu" "ok" "$GPU_INFO"
+    else
+        check "gpu" "warning" "nvidia-smi present but query failed"
+    fi
+fi
+
+# --- Output ---
+if [ "$OUTPUT" = "json" ]; then
+    echo "{"
+    echo "  \"timestamp\": \"$(date -Is)\","
+    echo "  \"issues\": $ISSUES,"
+    echo "  \"checks\": {"
+    FIRST=true
+    for name in "${!CHECKS[@]}"; do
+        IFS='|' read -r status detail <<< "${CHECKS[$name]}"
+        if [ "$FIRST" = true ]; then
+            FIRST=false
+        else
+            echo ","
+        fi
+        printf '    "%s": {"status": "%s", "detail": "%s"}' "$name" "$status" "$detail"
+    done
+    echo
+    echo "  }"
+    echo "}"
+elif [ "$OUTPUT" = "quiet" ]; then
+    exit $( [ "$ISSUES" -eq 0 ] && echo 0 || echo 1 )
+else
+    echo "=== DictIA Health Check ==="
+    echo
+    for name in docker container_dictia container_whisperx endpoint_dictia endpoint_whisperx endpoint_asr_proxy disk memory gpu; do
+        if [ -n "${CHECKS[$name]+x}" ]; then
+            IFS='|' read -r status detail <<< "${CHECKS[$name]}"
+            case "$status" in
+                ok)      ICON="[OK]" ;;
+                warning) ICON="[!!]" ;;
+                error)   ICON="[ERR]" ;;
+                info)    ICON="[--]" ;;
+            esac
+            printf "  %-22s %s  %s\n" "$name" "$ICON" "$detail"
+        fi
+    done
+    echo
+    if [ "$ISSUES" -eq 0 ]; then
+        echo "All checks passed."
+    else
+        echo "$ISSUES issue(s) found."
+    fi
+fi
--- a/deployment/tools/restore.sh
+++ b/deployment/tools/restore.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+# DictIA — Restore script
+#
+# Restores a DictIA backup archive created by backup.sh.
+#
+# Usage: bash restore.sh <ARCHIVE_PATH> [PROJECT_DIR]
+set -euo pipefail
+
+ARCHIVE="${1:-}"
+PROJECT_DIR="${2:-$(cd "$(dirname "$0")/../.." && pwd)}"
+
+if [ -z "$ARCHIVE" ] || [ ! -f "$ARCHIVE" ]; then
+    echo "Usage: bash restore.sh <backup-archive.tar.gz> [project-dir]"
+    echo
+    echo "Available backups:"
+    ls -1t "$PROJECT_DIR/backups"/dictia-*.tar.gz 2>/dev/null | head -5 || echo "  (none found)"
+    exit 1
+fi
+
+echo "=== DictIA Restore ==="
+echo "Archive: $ARCHIVE"
+echo "Target:  $PROJECT_DIR"
+echo
+
+# Validate archive
+echo "Validating archive..."
+TMPDIR=$(mktemp -d)
+tar xzf "$ARCHIVE" -C "$TMPDIR"
+BACKUP_DIR=$(ls -1d "$TMPDIR"/dictia-* | head -1)
+
+if [ ! -f "$BACKUP_DIR/manifest.json" ]; then
+    echo "ERROR: Invalid backup archive (no manifest.json)"
+    rm -rf "$TMPDIR"
+    exit 1
+fi
+
+echo "Manifest:"
+cat "$BACKUP_DIR/manifest.json"
+echo
+echo
+
+# Confirmation
+read -rp "This will overwrite current data. Continue? [y/N] " CONFIRM
+if [ "$CONFIRM" != "y" ] && [ "$CONFIRM" != "Y" ]; then
+    echo "Aborted."
+    rm -rf "$TMPDIR"
+    exit 0
+fi
+
+# Stop services
+echo
+echo "Stopping DictIA services..."
+COMPOSE_FILE=""
+for f in cloud local-cpu local-gpu; do
+    if [ -f "$PROJECT_DIR/deployment/docker/docker-compose.$f.yml" ]; then
+        COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml"
+    fi
+done
+if [ -n "$COMPOSE_FILE" ]; then
+    docker compose -f "$COMPOSE_FILE" down 2>/dev/null || true
+fi
+
+# Restore data
+if [ -d "$BACKUP_DIR/data" ]; then
+    echo "Restoring data/..."
+    rm -rf "$PROJECT_DIR/data"
+    cp -a "$BACKUP_DIR/data" "$PROJECT_DIR/data"
+fi
+
+# Restore .env
+if [ -f "$BACKUP_DIR/dot-env" ]; then
+    echo "Restoring .env..."
+    cp "$BACKUP_DIR/dot-env" "$PROJECT_DIR/.env"
+fi
+
+# Restore ASR stats
+if [ -f "$BACKUP_DIR/asr-usage-stats.json" ]; then
+    echo "Restoring ASR proxy stats..."
+    cp "$BACKUP_DIR/asr-usage-stats.json" "$PROJECT_DIR/deployment/asr-proxy/usage-stats.json"
+fi
+
+# Restore Docker volumes
+if [ -f "$BACKUP_DIR/whisperx-cache.tar.gz" ]; then
+    echo "Restoring whisperx-cache volume..."
+    docker volume create whisperx-cache 2>/dev/null || true
+    docker run --rm -v whisperx-cache:/target -v "$BACKUP_DIR":/backup \
+        alpine sh -c "cd /target && tar xzf /backup/whisperx-cache.tar.gz" 2>/dev/null || true
+fi
+
+# Cleanup
+rm -rf "$TMPDIR"
+
+# Restart services
+echo
+echo "Restarting DictIA..."
+if [ -n "$COMPOSE_FILE" ]; then
+    docker compose -f "$COMPOSE_FILE" up -d
+fi
+
+echo
+echo "=== Restore complete ==="
--- a/deployment/tools/update.sh
+++ b/deployment/tools/update.sh
@@ -0,0 +1,105 @@
+#!/usr/bin/env bash
+# DictIA — Update script
+#
+# Pulls latest code, rebuilds Docker image, and restarts services.
+# Detects the active deployment profile automatically.
+#
+# Usage: bash update.sh [--no-pull] [--no-build]
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
+NO_PULL=false
+NO_BUILD=false
+
+for arg in "$@"; do
+    case "$arg" in
+        --no-pull)  NO_PULL=true ;;
+        --no-build) NO_BUILD=true ;;
+        *)          echo "Unknown option: $arg"; exit 1 ;;
+    esac
+done
+
+echo "=== DictIA Update ==="
+echo "Project: $PROJECT_DIR"
+echo
+
+# 1. Detect active compose file
+COMPOSE_FILE=""
+PROFILE=""
+for f in cloud local-cpu local-gpu; do
+    CF="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml"
+    if [ -f "$CF" ] && docker compose -f "$CF" ps --quiet 2>/dev/null | grep -q .; then
+        COMPOSE_FILE="$CF"
+        PROFILE="$f"
+        break
+    fi
+done
+
+if [ -z "$COMPOSE_FILE" ]; then
+    # Fallback: check .env for profile
+    if [ -f "$PROJECT_DIR/.env" ]; then
+        PROFILE=$(grep -E '^DICTIA_PROFILE=' "$PROJECT_DIR/.env" 2>/dev/null | cut -d= -f2 || echo "cloud")
+    fi
+    PROFILE="${PROFILE:-cloud}"
+    COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$PROFILE.yml"
+fi
+
+echo "Profile: $PROFILE"
+echo "Compose: $COMPOSE_FILE"
+echo
+
+# 2. Git pull
+if [ "$NO_PULL" = false ]; then
+    echo "[1/5] Pulling latest code..."
+    cd "$PROJECT_DIR"
+    git pull origin dictia-branding
+else
+    echo "[1/5] Skipping git pull (--no-pull)"
+fi
+
+# 3. Rebuild DictIA image
+if [ "$NO_BUILD" = false ]; then
+    echo "[2/5] Building DictIA image..."
+    cd "$PROJECT_DIR"
+    docker build -t innova-ai/dictia:latest .
+else
+    echo "[2/5] Skipping build (--no-build)"
+fi
+
+# 3b. Pull upstream images (WhisperX) if local profile
+if [ "$PROFILE" != "cloud" ] && [ "$NO_BUILD" = false ]; then
+    echo "[3/5] Pulling upstream images (WhisperX)..."
+    docker compose -f "$COMPOSE_FILE" pull whisperx-asr 2>/dev/null || true
+else
+    echo "[3/5] Skipping upstream pull (cloud profile or --no-build)"
+fi
+
+# 4. Restart containers
+echo "[4/5] Restarting containers..."
+docker compose -f "$COMPOSE_FILE" down
+docker compose -f "$COMPOSE_FILE" up -d
+
+# 5. Wait for health
+echo "[5/5] Waiting for health check..."
+RETRIES=30
+for i in $(seq 1 $RETRIES); do
+    if docker compose -f "$COMPOSE_FILE" ps | grep -q "healthy"; then
+        echo "  DictIA is healthy!"
+        break
+    fi
+    if [ "$i" -eq "$RETRIES" ]; then
+        echo "  WARNING: Health check timeout. Check: docker compose -f $COMPOSE_FILE logs"
+    fi
+    sleep 5
+done
+
+# Cleanup dangling images
+echo
+echo "Cleaning up old images..."
+docker image prune -f 2>/dev/null || true
+
+echo
+echo "=== Update complete ==="
+echo "DictIA: http://localhost:8899"
+docker compose -f "$COMPOSE_FILE" ps