diff --git a/README.md b/README.md
index e5ae33a..e189276 100644
--- a/README.md
+++ b/README.md
@@ -13,20 +13,12 @@ Application de transcription audio propulsee par l'intelligence artificielle. Tr
- Conformite Loi 25 (Quebec) — journal d'audit integre
- 100% auto-heberge — vos donnees restent chez vous
-## Demarrage rapide
-
-Voir le [guide de demarrage](client_docs/guide-utilisateur/premiers-pas.md).
-
## Documentation
- [Guide utilisateur](client_docs/guide-utilisateur/index.md)
- [Guide administrateur](client_docs/guide-admin/index.md)
- [Depannage](client_docs/depannage/index.md)
-## Deploiement
-
-Voir le [guide de deploiement](deployment/README.md) et les profils Docker dans `deployment/profiles/`.
-
## Licence
AGPL-3.0 — voir [LICENSE](LICENSE).
diff --git a/deployment/README.md b/deployment/README.md
deleted file mode 100644
index 0569f71..0000000
--- a/deployment/README.md
+++ /dev/null
@@ -1,105 +0,0 @@
-# DictIA — Deployment Infrastructure
-
-Infrastructure de deploiement reproductible pour DictIA .
-
-## Choix de profil
-
-```
-Quel est ton setup?
- |
- +-- VPS / serveur cloud?
- | --> cloud (ASR Proxy GCP GPU on demand)
- |
- +-- Machine locale avec GPU NVIDIA?
- | --> local-gpu (WhisperX sur GPU, le plus rapide)
- |
- +-- Machine locale sans GPU?
- --> local-cpu (WhisperX sur CPU, lent mais fonctionnel)
-```
-
-## Quickstart
-
-```bash
-git clone https://gitea.innova-ai.ca/Innova-AI/dictia-public.git
-cd dictia
-git checkout dictia-branding
-bash deployment/setup.sh
-```
-
-Le script detecte le hardware et guide l'installation.
-
-## Architecture
-
-```
-deployment/
-├── setup.sh # Installateur principal
-├── docker/
-│ ├── docker-compose.cloud.yml
-│ ├── docker-compose.local-cpu.yml
-│ ├── docker-compose.local-gpu.yml
-│ └── .env.example
-├── asr-proxy/ # Proxy GCP GPU (cloud seulement)
-│ ├── proxy.py
-│ ├── dashboard.html
-│ ├── requirements.txt
-│ ├── setup.sh
-│ └── asr-proxy.service
-├── security/ # Securite Docker (cloud)
-│ ├── docker-daemon.json
-│ ├── iptables-rules.sh
-│ └── docker-iptables.service
-├── config/
-│ ├── nginx/dictia.conf
-│ ├── tailscale/setup-serve.sh
-│ └── systemd/dictia.service
-├── tools/
-│ ├── backup.sh
-│ ├── restore.sh
-│ ├── update.sh
-│ └── health-check.sh
-└── docs/
- ├── QUICKSTART.md
- ├── VPS-SETUP.md
- ├── LOCAL-SETUP.md
- ├── MAINTENANCE.md
- └── TROUBLESHOOTING.md
-```
-
-### Profil Cloud
-
-```
-Internet --> Tailscale --> VPS
- |
- DictIA :8899
- |
- ASR Proxy :9090
- |
- GCP GPU (auto start/stop)
- |
- WhisperX :9000
-```
-
-### Profil Local GPU/CPU
-
-```
-localhost:8899 --> DictIA container
- |
- WhisperX container :9000
- |
- GPU local (ou CPU)
-```
-
-## Documentation
-
-- [QUICKSTART.md](docs/QUICKSTART.md) — Demarrage rapide par profil
-- [VPS-SETUP.md](docs/VPS-SETUP.md) — Setup VPS complet from scratch
-- [LOCAL-SETUP.md](docs/LOCAL-SETUP.md) — Setup local GPU/CPU
-- [MAINTENANCE.md](docs/MAINTENANCE.md) — Backup, restore, update, monitoring
-- [TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) — Problemes courants + solutions
-
-## Mise a jour upstream
-
-Tous les fichiers dans `deployment/` sont specifiques a DictIA.
-Aucun conflit lors des merges upstream, sauf `deployment/setup.sh`
-(qui remplace le setup.sh original de Speakr).
-
diff --git a/deployment/asr-proxy/.gitignore b/deployment/asr-proxy/.gitignore
deleted file mode 100644
index 8ff2efb..0000000
--- a/deployment/asr-proxy/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-gcp-credentials.json
-usage-stats.json
-venv/
-__pycache__/
-*.pyc
diff --git a/deployment/asr-proxy/asr-proxy.service b/deployment/asr-proxy/asr-proxy.service
deleted file mode 100644
index 917ec8a..0000000
--- a/deployment/asr-proxy/asr-proxy.service
+++ /dev/null
@@ -1,22 +0,0 @@
-# TEMPLATE — Ne pas copier directement dans /etc/systemd/system/.
-# Les variables ${ASR_PROXY_USER} et ${ASR_PROXY_DIR} sont des placeholders.
-# Le fichier service réel est généré par setup.sh (via heredoc bash) avec les
-# valeurs résolues de $SERVICE_USER et $INSTALL_DIR.
-# Usage : sudo bash setup.sh (installe et active le service automatiquement)
-
-[Unit]
-Description=DictIA ASR Proxy - GPU Auto-Start/Stop for WhisperX
-After=network.target
-
-[Service]
-Type=simple
-User=${ASR_PROXY_USER}
-Restart=always
-RestartSec=10
-WorkingDirectory=${ASR_PROXY_DIR}
-ExecStart=${ASR_PROXY_DIR}/venv/bin/python proxy.py
-Environment=GOOGLE_APPLICATION_CREDENTIALS=${ASR_PROXY_DIR}/gcp-credentials.json
-Environment=STATS_FILE=${ASR_PROXY_DIR}/usage-stats.json
-
-[Install]
-WantedBy=multi-user.target
diff --git a/deployment/asr-proxy/dashboard.html b/deployment/asr-proxy/dashboard.html
deleted file mode 100644
index ba1ca7b..0000000
--- a/deployment/asr-proxy/dashboard.html
+++ /dev/null
@@ -1,1534 +0,0 @@
-
-
-
-
-
-DictIA GPU Monitor
-
-
-
-
-
-
-
-
- Connection error: unable to reach proxy
-
-
-
-
-
-
-
-
-
-
- ---
- ---
-
- 0 active requests
-
-
-
-
-
-
-
--
-
GPU Time
-
This Month
-
-
-
--
-
Estimated Cost
-
USD
-
-
-
--
-
Total Requests
-
This Month
-
-
-
--
-
Remaining
-
of --h
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- | Time |
- Type |
- Duration |
- Status |
- Zone |
-
-
-
- No requests yet |
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/deployment/asr-proxy/proxy.py b/deployment/asr-proxy/proxy.py
deleted file mode 100644
index db20d1a..0000000
--- a/deployment/asr-proxy/proxy.py
+++ /dev/null
@@ -1,741 +0,0 @@
-"""DictIA ASR Proxy - Auto-start/stop GCP GPU for WhisperX + Ollama.
-
-Uses Google Cloud Compute REST API directly (no gcloud CLI needed).
-Proxies both ASR (WhisperX) and LLM (Ollama) requests.
-Multi-zone fallback across Canada (Montreal + Toronto).
-"""
-
-import asyncio
-import json
-import logging
-import os
-import time
-
-import httpx
-import jwt as pyjwt
-from pathlib import Path
-
-from fastapi import FastAPI, Request
-from fastapi.responses import HTMLResponse, JSONResponse, Response
-
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
-log = logging.getLogger("asr-proxy")
-
-# Config — paths relative to this script's directory by default
-SCRIPT_DIR = Path(__file__).parent
-GCP_PROJECT = os.getenv("GCP_PROJECT", "speakr-gpu")
-WHISPERX_PORT = int(os.getenv("WHISPERX_PORT", "9000"))
-OLLAMA_PORT = int(os.getenv("OLLAMA_PORT", "11434"))
-IDLE_TIMEOUT = int(os.getenv("IDLE_TIMEOUT", "300"))
-CREDS_FILE = os.getenv("GOOGLE_APPLICATION_CREDENTIALS", str(SCRIPT_DIR / "gcp-credentials.json"))
-STATS_FILE = os.getenv("STATS_FILE", str(SCRIPT_DIR / "usage-stats.json"))
-MONTHLY_LIMIT_HOURS = float(os.getenv("MONTHLY_LIMIT_HOURS", "30"))
-# Real GCP cost per GPU-hour (g2-standard-4 + L4): GPU ($0.837) + vCPU ($0.151) + RAM ($0.069)
-GPU_COST_PER_HOUR = float(os.getenv("GPU_COST_PER_HOUR", "1.06"))
-# Fixed monthly costs: SSD disks ($5.66) + snapshots ($4.19) ≈ $9.85/month
-FIXED_MONTHLY_COST = float(os.getenv("FIXED_MONTHLY_COST", "9.85"))
-SNAPSHOT_NAME = "whisperx-gpu-snapshot"
-HEALTH_POLL_INTERVAL = 5
-BOOT_TIMEOUT = 300
-
-# Zone fallback order — Canada only, Montreal first
-ZONE_FALLBACKS = [
- {
- "zone": "northamerica-northeast1-b",
- "instance": "whisperx-gpu-mtl1",
- "machine_type": "g2-standard-4",
- "accelerator": "nvidia-l4",
- "accel_count": 1,
- "label": "Montreal-b (L4)",
- },
- {
- "zone": "northamerica-northeast1-c",
- "instance": "whisperx-gpu-mtl2",
- "machine_type": "n1-standard-4",
- "accelerator": "nvidia-tesla-t4",
- "accel_count": 1,
- "label": "Montreal-c (T4)",
- },
- {
- "zone": "northamerica-northeast2-a",
- "instance": "whisperx-gpu-tor1",
- "machine_type": "g2-standard-4",
- "accelerator": "nvidia-l4",
- "accel_count": 1,
- "label": "Toronto-a (L4)",
- },
- {
- "zone": "northamerica-northeast2-b",
- "instance": "whisperx-gpu",
- "machine_type": "g2-standard-4",
- "accelerator": "nvidia-l4",
- "accel_count": 1,
- "label": "Toronto-b (L4)",
- },
-]
-
-STARTUP_SCRIPT = """#!/bin/bash
-systemctl start docker
-sleep 5
-docker start whisperx-asr 2>/dev/null || true
-systemctl start ollama 2>/dev/null || true
-"""
-
-app = FastAPI(title="DictIA ASR Proxy")
-
-# State
-last_request_time = 0.0
-active_requests = 0
-gpu_ip: str | None = None
-active_zone: dict | None = None
-shutdown_task: asyncio.Task | None = None
-
-# Request history tracking (in-memory, last 20 requests)
-request_history: list[dict] = []
-MAX_HISTORY = 20
-
-# Zone status tracking
-zone_status: dict[str, dict] = {}
-
-# Startup lock and failure cooldown
-_startup_lock: asyncio.Lock | None = None
-_last_failure_time: float = 0
-FAILURE_COOLDOWN = 180
-
-# OAuth2 token cache
-_access_token: str | None = None
-_token_expiry: float = 0
-
-
-# --- Usage Stats ---
-
-def load_stats() -> dict:
- try:
- with open(STATS_FILE) as f:
- return json.load(f)
- except (FileNotFoundError, json.JSONDecodeError):
- return {"gpu_seconds": 0, "month": time.strftime("%Y-%m"), "requests": 0, "last_start": 0}
-
-
-def save_stats(stats: dict):
- with open(STATS_FILE, "w") as f:
- json.dump(stats, f, indent=2)
-
-
-def track_gpu_time():
- stats = load_stats()
- current_month = time.strftime("%Y-%m")
- if stats.get("month") != current_month:
- stats = {"gpu_seconds": 0, "month": current_month, "requests": 0, "last_start": 0}
- if stats.get("last_start", 0) > 0:
- elapsed = time.time() - stats["last_start"]
- stats["gpu_seconds"] += elapsed
- stats["last_start"] = 0
- save_stats(stats)
-
-
-def check_budget() -> tuple[bool, float]:
- stats = load_stats()
- current_month = time.strftime("%Y-%m")
- if stats.get("month") != current_month:
- return True, 0.0
- hours_used = stats.get("gpu_seconds", 0) / 3600
- return hours_used < MONTHLY_LIMIT_HOURS, hours_used
-
-
-# --- GCP Auth ---
-
-async def get_access_token() -> str:
- global _access_token, _token_expiry
- if _access_token and time.time() < _token_expiry - 60:
- return _access_token
- with open(CREDS_FILE) as f:
- creds = json.load(f)
- cred_type = creds.get("type", "authorized_user")
- async with httpx.AsyncClient() as client:
- if cred_type == "service_account":
- now = int(time.time())
- payload = {
- "iss": creds["client_email"],
- "scope": "https://www.googleapis.com/auth/compute",
- "aud": "https://oauth2.googleapis.com/token",
- "iat": now,
- "exp": now + 3600,
- }
- signed = pyjwt.encode(payload, creds["private_key"], algorithm="RS256")
- resp = await client.post(
- "https://oauth2.googleapis.com/token",
- data={
- "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
- "assertion": signed,
- },
- )
- else:
- resp = await client.post(
- "https://oauth2.googleapis.com/token",
- data={
- "client_id": creds["client_id"],
- "client_secret": creds["client_secret"],
- "refresh_token": creds["refresh_token"],
- "grant_type": "refresh_token",
- },
- )
- resp.raise_for_status()
- data = resp.json()
- _access_token = data["access_token"]
- _token_expiry = time.time() + data.get("expires_in", 3600)
- log.info(f"Refreshed GCP access token ({cred_type})")
- return _access_token
-
-
-# --- GCP Compute API ---
-
-COMPUTE_BASE = "https://compute.googleapis.com/compute/v1"
-
-
-async def gcp_api(method: str, url: str, **kwargs) -> httpx.Response:
- token = await get_access_token()
- async with httpx.AsyncClient(timeout=60) as client:
- resp = await client.request(
- method, url,
- headers={"Authorization": f"Bearer {token}"},
- **kwargs,
- )
- return resp
-
-
-async def get_instance_info(zone: str, instance: str) -> dict | None:
- url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}"
- resp = await gcp_api("GET", url)
- if resp.status_code == 404:
- return None
- if resp.status_code >= 400:
- log.error(f"GCP API error {resp.status_code}: {resp.text}")
- return None
- return resp.json()
-
-
-def extract_ip(instance_data: dict) -> str:
- interfaces = instance_data.get("networkInterfaces", [])
- if interfaces:
- access = interfaces[0].get("accessConfigs", [])
- if access:
- return access[0].get("natIP", "")
- return ""
-
-
-async def start_instance_in_zone(zone: str, instance: str) -> bool:
- url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}/start"
- resp = await gcp_api("POST", url)
- if resp.status_code < 400:
- log.info(f"Start requested: {instance} in {zone}")
- return True
- log.warning(f"Failed to start {instance} in {zone}: {resp.status_code} {resp.text}")
- return False
-
-
-async def stop_instance_in_zone(zone: str, instance: str):
- url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}/stop"
- resp = await gcp_api("POST", url)
- if resp.status_code < 400:
- log.info(f"Stop requested: {instance} in {zone}")
- else:
- log.error(f"Failed to stop {instance} in {zone}: {resp.status_code}")
-
-
-async def create_instance_from_snapshot(config: dict) -> bool:
- zone = config["zone"]
- instance = config["instance"]
- machine = config["machine_type"]
- accel = config["accelerator"]
- accel_count = config["accel_count"]
-
- log.info(f"Creating {instance} in {zone} from snapshot...")
-
- body = {
- "name": instance,
- "machineType": f"zones/{zone}/machineTypes/{machine}",
- "disks": [{
- "boot": True,
- "autoDelete": True,
- "initializeParams": {
- "diskSizeGb": "50",
- "diskType": f"zones/{zone}/diskTypes/pd-ssd",
- "sourceSnapshot": f"global/snapshots/{SNAPSHOT_NAME}",
- },
- }],
- "networkInterfaces": [{
- "network": "global/networks/default",
- "accessConfigs": [{"type": "ONE_TO_ONE_NAT", "name": "External NAT"}],
- }],
- "guestAccelerators": [{
- "acceleratorType": f"zones/{zone}/acceleratorTypes/{accel}",
- "acceleratorCount": accel_count,
- }],
- "scheduling": {
- "onHostMaintenance": "TERMINATE",
- "automaticRestart": False,
- },
- "tags": {"items": ["whisperx-gpu"]},
- "metadata": {
- "items": [{"key": "startup-script", "value": STARTUP_SCRIPT}],
- },
- }
-
- url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances"
- resp = await gcp_api("POST", url, json=body)
-
- if resp.status_code < 400:
- log.info(f"Created {instance} in {zone}")
- return True
-
- error_text = resp.text
- if "ZONE_RESOURCE_POOL_EXHAUSTED" in error_text:
- log.warning(f"No capacity in {zone} -- skipping")
- elif "QUOTA" in error_text.upper():
- log.warning(f"Quota exceeded for {zone}: {error_text[:200]}")
- else:
- log.error(f"Failed to create in {zone}: {resp.status_code} {error_text[:200]}")
- return False
-
-
-# --- Core Logic ---
-
-async def wait_for_running(zone: str, instance: str, timeout: int = 120, grace: int = 15) -> bool:
- gone_count = 0
- start_time = time.time()
- for _ in range(timeout // 5):
- info = await get_instance_info(zone, instance)
- if info and info.get("status") == "RUNNING":
- return True
- status = info.get("status", "UNKNOWN") if info else "GONE"
- elapsed = time.time() - start_time
- if status == "GONE":
- gone_count += 1
- if gone_count >= 2:
- log.warning(f"{instance} in {zone}: instance disappeared (no capacity)")
- return False
- if status in ("STOPPING",):
- log.warning(f"{instance} in {zone}: status {status} (no capacity)")
- return False
- if status in ("TERMINATED", "STOPPED") and elapsed > grace:
- log.warning(f"{instance} in {zone}: status {status} after {elapsed:.0f}s (no capacity)")
- return False
- await asyncio.sleep(5)
- return False
-
-
-async def delete_instance(zone: str, instance: str):
- url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}"
- resp = await gcp_api("DELETE", url)
- if resp.status_code < 400:
- log.info(f"Deleted {instance} in {zone} to free quota")
- elif resp.status_code == 404:
- pass
- else:
- log.warning(f"Failed to delete {instance} in {zone}: {resp.status_code}")
-
-
-async def ensure_gpu_running() -> str:
- global gpu_ip, active_zone, _last_failure_time
-
- if _last_failure_time > 0:
- remaining = FAILURE_COOLDOWN - (time.time() - _last_failure_time)
- if remaining > 0:
- log.info(f"GPU cooldown active ({int(remaining)}s remaining), waiting...")
- await asyncio.sleep(remaining)
- _last_failure_time = 0
-
- async with _startup_lock:
- ok, hours = check_budget()
- if not ok:
- raise RuntimeError(f"Monthly GPU limit reached ({hours:.1f}h / {MONTHLY_LIMIT_HOURS}h)")
-
- if active_zone:
- info = await get_instance_info(active_zone["zone"], active_zone["instance"])
- if info and info.get("status") == "RUNNING":
- gpu_ip = extract_ip(info)
- if gpu_ip:
- return gpu_ip
-
- errors = []
-
- for config in ZONE_FALLBACKS:
- zone = config["zone"]
- instance = config["instance"]
- label = config["label"]
-
- log.info(f"Trying {label}...")
- info = await get_instance_info(zone, instance)
-
- if info is None:
- created = await create_instance_from_snapshot(config)
- if not created:
- zone_status[label] = {
- "status": "no_capacity",
- "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
- "last_error": "no capacity",
- }
- errors.append(f"{label}: no capacity")
- continue
- if not await wait_for_running(zone, instance, grace=30):
- zone_status[label] = {
- "status": "error",
- "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
- "last_error": "created but failed to start",
- }
- errors.append(f"{label}: created but failed to start")
- await delete_instance(zone, instance)
- await asyncio.sleep(3)
- continue
- else:
- status = info.get("status", "UNKNOWN")
-
- if status == "RUNNING":
- pass
- elif status in ("TERMINATED", "STOPPED"):
- zone_status[label] = {
- "status": "starting",
- "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
- "last_error": None,
- }
- started = await start_instance_in_zone(zone, instance)
- if not started:
- zone_status[label] = {
- "status": "error",
- "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
- "last_error": "start rejected",
- }
- errors.append(f"{label}: start rejected")
- continue
- if not await wait_for_running(zone, instance, grace=20):
- zone_status[label] = {
- "status": "error",
- "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
- "last_error": "didn't reach RUNNING",
- }
- errors.append(f"{label}: didn't reach RUNNING")
- continue
- elif status in ("STAGING", "PROVISIONING"):
- zone_status[label] = {
- "status": "starting",
- "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
- "last_error": None,
- }
- if not await wait_for_running(zone, instance):
- zone_status[label] = {
- "status": "error",
- "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
- "last_error": f"stuck in {status}",
- }
- errors.append(f"{label}: stuck in {status}")
- continue
- elif status == "STOPPING":
- log.info(f"{label}: STOPPING, deleting to free quota")
- await delete_instance(zone, instance)
- await asyncio.sleep(3)
- zone_status[label] = {
- "status": "error",
- "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
- "last_error": "was STOPPING, deleted",
- }
- errors.append(f"{label}: was STOPPING, deleted")
- continue
-
- info = await get_instance_info(zone, instance)
- if info and info.get("status") == "RUNNING":
- gpu_ip = extract_ip(info)
- if gpu_ip:
- active_zone = config
- _last_failure_time = 0
- zone_status[label] = {
- "status": "running",
- "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
- "last_error": None,
- }
- stats = load_stats()
- stats["last_start"] = time.time()
- stats["requests"] = stats.get("requests", 0) + 1
- stats["active_zone"] = label
- save_stats(stats)
- log.info(f"GPU ready in {label}, IP: {gpu_ip}")
- return gpu_ip
-
- zone_status[label] = {
- "status": "error",
- "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
- "last_error": "running but no IP",
- }
- errors.append(f"{label}: running but no IP")
-
- _last_failure_time = time.time()
- raise RuntimeError(
- f"No GPU available in any Canadian zone. Tried: {'; '.join(errors)}"
- )
-
-
-async def ensure_gpu_ready() -> str:
- ip = await ensure_gpu_running()
- url = f"http://{ip}:{WHISPERX_PORT}/health"
- log.info(f"Waiting for WhisperX at {url}...")
- async with httpx.AsyncClient(timeout=10) as client:
- for _ in range(BOOT_TIMEOUT // HEALTH_POLL_INTERVAL):
- try:
- resp = await client.get(url)
- if resp.status_code == 200:
- log.info("WhisperX is healthy!")
- return ip
- except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout):
- pass
- await asyncio.sleep(HEALTH_POLL_INTERVAL)
- raise RuntimeError("WhisperX did not become healthy in time")
-
-
-async def ensure_ollama_ready() -> str:
- ip = await ensure_gpu_running()
- url = f"http://{ip}:{OLLAMA_PORT}/api/tags"
- log.info(f"Waiting for Ollama at {url}...")
- async with httpx.AsyncClient(timeout=10) as client:
- for _ in range(BOOT_TIMEOUT // HEALTH_POLL_INTERVAL):
- try:
- resp = await client.get(url)
- if resp.status_code == 200:
- log.info("Ollama is healthy!")
- return ip
- except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout):
- pass
- await asyncio.sleep(HEALTH_POLL_INTERVAL)
- raise RuntimeError("Ollama did not become healthy in time")
-
-
-async def idle_shutdown_loop():
- while True:
- await asyncio.sleep(60)
- if last_request_time == 0 or active_zone is None:
- continue
- if active_requests > 0:
- continue
- elapsed = time.time() - last_request_time
- if elapsed >= IDLE_TIMEOUT:
- try:
- zone = active_zone["zone"]
- instance = active_zone["instance"]
- label = active_zone["label"]
- info = await get_instance_info(zone, instance)
- if info and info.get("status") == "RUNNING":
- log.info(f"Idle {int(elapsed)}s -- stopping {label}")
- await stop_instance_in_zone(zone, instance)
- track_gpu_time()
- except Exception as e:
- log.error(f"Error stopping: {e}")
-
-
-# --- Endpoints ---
-
-@app.on_event("startup")
-async def on_startup():
- global shutdown_task, _startup_lock
- _startup_lock = asyncio.Lock()
- await get_access_token()
- shutdown_task = asyncio.create_task(idle_shutdown_loop())
- zones = ", ".join(c["label"] for c in ZONE_FALLBACKS)
- log.info(f"DictIA ASR Proxy started. Zones: [{zones}]. Idle: {IDLE_TIMEOUT}s, limit: {MONTHLY_LIMIT_HOURS}h")
-
-
-@app.post("/asr")
-async def asr_proxy(request: Request):
- global last_request_time, active_requests
-
- body = await request.body()
- headers = {
- k: v for k, v in request.headers.items()
- if k.lower() not in ("host", "transfer-encoding")
- }
-
- last_request_time = time.time()
- active_requests += 1
- start_time = time.time()
- result_status = 200
- try:
- ip = await ensure_gpu_ready()
- target = f"http://{ip}:{WHISPERX_PORT}/asr"
- log.info(f"Forwarding {len(body)} bytes to {target}")
- async with httpx.AsyncClient(timeout=httpx.Timeout(7200.0)) as client:
- resp = await client.post(target, content=body, headers=headers)
- last_request_time = time.time()
- result_status = resp.status_code
- ct = resp.headers.get("content-type", "")
- if "application/json" in ct:
- return JSONResponse(content=resp.json(), status_code=resp.status_code)
- else:
- return JSONResponse(content=resp.text, status_code=resp.status_code)
- except httpx.ReadTimeout:
- result_status = 504
- return JSONResponse({"error": "Transcription timeout (2h)"}, status_code=504)
- except Exception as e:
- result_status = 502
- log.error(f"Proxy error: {e}")
- return JSONResponse({"error": str(e)}, status_code=502)
- finally:
- active_requests -= 1
- last_request_time = time.time()
- request_history.insert(0, {
- "time": time.strftime("%Y-%m-%dT%H:%M:%S"),
- "type": "ASR",
- "duration_sec": round(time.time() - start_time, 1),
- "status": result_status,
- "zone": active_zone["label"] if active_zone else "none",
- })
- if len(request_history) > MAX_HISTORY:
- request_history.pop()
-
-
-@app.get("/health")
-async def health():
- zone_label = active_zone["label"] if active_zone else "none"
- gpu_status = "unknown"
- if active_zone:
- try:
- info = await get_instance_info(active_zone["zone"], active_zone["instance"])
- gpu_status = info.get("status", "unknown") if info else "not_found"
- except Exception:
- pass
- ok, hours = check_budget()
- stats = load_stats()
- return {
- "proxy": "healthy",
- "gpu_instance": gpu_status,
- "gpu_zone": zone_label,
- "active_requests": active_requests,
- "idle_timeout": IDLE_TIMEOUT,
- "usage": {
- "month": stats.get("month"),
- "gpu_hours": round(hours, 2),
- "gpu_limit_hours": MONTHLY_LIMIT_HOURS,
- "requests_count": stats.get("requests", 0),
- "budget_ok": ok,
- },
- "gpu_ip": gpu_ip,
- "machine_type": active_zone.get("machine_type", "unknown") if active_zone else "unknown",
- "gpu_model": active_zone.get("accelerator", "unknown") if active_zone else "unknown",
- "idle_seconds": round(time.time() - last_request_time) if last_request_time > 0 else 0,
- "auto_shutdown_in": max(0, IDLE_TIMEOUT - round(time.time() - last_request_time)) if last_request_time > 0 and active_zone else None,
- "token_expires_in": round(_token_expiry - time.time()) if _token_expiry > 0 else None,
- }
-
-
-@app.get("/stats")
-async def get_stats():
- stats = load_stats()
- hours = stats.get("gpu_seconds", 0) / 3600
- gpu_cost = hours * GPU_COST_PER_HOUR
- total_cost = gpu_cost + FIXED_MONTHLY_COST
- return {
- "month": stats.get("month"),
- "gpu_hours": round(hours, 2),
- "gpu_minutes": round(hours * 60, 1),
- "estimated_cost_usd": round(total_cost, 2),
- "gpu_cost_usd": round(gpu_cost, 2),
- "fixed_cost_usd": FIXED_MONTHLY_COST,
- "monthly_limit_hours": MONTHLY_LIMIT_HOURS,
- "remaining_hours": round(MONTHLY_LIMIT_HOURS - hours, 2),
- "requests_count": stats.get("requests", 0),
- "active_zone": stats.get("active_zone", "none"),
- "cost_per_hour": GPU_COST_PER_HOUR,
- "recent_requests": request_history[:10],
- "zone_fallbacks": [
- {
- "label": config["label"],
- "zone": config["zone"],
- "machine": config["machine_type"],
- "gpu": config["accelerator"],
- **zone_status.get(config["label"], {"status": "unknown", "last_tried": None, "last_error": None}),
- }
- for config in ZONE_FALLBACKS
- ],
- }
-
-
-@app.post("/gpu/start")
-async def gpu_start():
- try:
- ip = await ensure_gpu_ready()
- label = active_zone["label"] if active_zone else "unknown"
- return {"status": "running", "ip": ip, "zone": label}
- except Exception as e:
- return JSONResponse({"error": str(e)}, status_code=503)
-
-
-@app.post("/gpu/stop")
-async def gpu_stop():
- if not active_zone:
- return {"status": "no active instance"}
- try:
- await stop_instance_in_zone(active_zone["zone"], active_zone["instance"])
- track_gpu_time()
- return {"status": "stopped", "zone": active_zone["label"]}
- except Exception as e:
- return JSONResponse({"error": str(e)}, status_code=500)
-
-
-DASHBOARD_HTML = Path(__file__).parent / "dashboard.html"
-
-
-@app.get("/", response_class=HTMLResponse)
-async def dashboard():
- if DASHBOARD_HTML.exists():
- return HTMLResponse(DASHBOARD_HTML.read_text(encoding="utf-8"))
- return HTMLResponse("Dashboard not found
Place dashboard.html next to proxy.py
", status_code=404)
-
-
-@app.api_route("/v1/{path:path}", methods=["POST", "GET"])
-async def llm_proxy(request: Request, path: str):
- global last_request_time, active_requests
-
- body = await request.body()
- headers = {
- k: v for k, v in request.headers.items()
- if k.lower() not in ("host", "transfer-encoding")
- }
-
- last_request_time = time.time()
- active_requests += 1
- start_time = time.time()
- result_status = 200
- try:
- ip = await ensure_ollama_ready()
- target = f"http://{ip}:{OLLAMA_PORT}/v1/{path}"
- log.info(f"Forwarding LLM request to {target}")
- async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client:
- resp = await client.request(request.method, target, content=body, headers=headers)
- last_request_time = time.time()
- result_status = resp.status_code
- return Response(
- content=resp.content,
- status_code=resp.status_code,
- media_type=resp.headers.get("content-type"),
- )
- except httpx.ReadTimeout:
- result_status = 504
- return JSONResponse({"error": "LLM timeout (5min)"}, status_code=504)
- except Exception as e:
- result_status = 502
- log.error(f"LLM proxy error: {e}")
- return JSONResponse({"error": str(e)}, status_code=502)
- finally:
- active_requests -= 1
- last_request_time = time.time()
- request_history.insert(0, {
- "time": time.strftime("%Y-%m-%dT%H:%M:%S"),
- "type": "LLM",
- "duration_sec": round(time.time() - start_time, 1),
- "status": result_status,
- "zone": active_zone["label"] if active_zone else "none",
- })
- if len(request_history) > MAX_HISTORY:
- request_history.pop()
-
-
-if __name__ == "__main__":
- import uvicorn
- uvicorn.run(app, host="0.0.0.0", port=9090)
diff --git a/deployment/asr-proxy/requirements.txt b/deployment/asr-proxy/requirements.txt
deleted file mode 100644
index f301f93..0000000
--- a/deployment/asr-proxy/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-fastapi==0.115.0
-uvicorn==0.30.0
-httpx==0.27.0
-PyJWT==2.9.0
-cryptography>=43.0.0
diff --git a/deployment/asr-proxy/setup.sh b/deployment/asr-proxy/setup.sh
deleted file mode 100644
index f0d88f8..0000000
--- a/deployment/asr-proxy/setup.sh
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env bash
-# DictIA ASR Proxy — Setup script
-# Installs the GCP GPU proxy for cloud deployments.
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-INSTALL_DIR="${ASR_PROXY_DIR:-$SCRIPT_DIR}"
-SERVICE_USER="${ASR_PROXY_USER:-$(whoami)}"
-
-echo "=== DictIA ASR Proxy Setup ==="
-echo "Install directory: $INSTALL_DIR"
-echo "Service user: $SERVICE_USER"
-echo
-
-# 1. Create virtual environment
-if [ ! -d "$INSTALL_DIR/venv" ]; then
- echo "[1/4] Creating Python virtual environment..."
- python3 -m venv "$INSTALL_DIR/venv"
-else
- echo "[1/4] Virtual environment already exists."
-fi
-
-# 2. Install dependencies
-echo "[2/4] Installing Python dependencies..."
-"$INSTALL_DIR/venv/bin/pip" install --quiet --upgrade pip
-"$INSTALL_DIR/venv/bin/pip" install --quiet -r "$INSTALL_DIR/requirements.txt"
-
-# 3. GCP credentials
-if [ ! -f "$INSTALL_DIR/gcp-credentials.json" ]; then
- echo "[3/4] GCP credentials not found."
- echo " Place your GCP service account or OAuth credentials at:"
- echo " $INSTALL_DIR/gcp-credentials.json"
- echo
- echo " For service account: download JSON from GCP Console > IAM > Service Accounts"
- echo " For user credentials: run 'gcloud auth application-default login' and copy the file"
- echo
- read -rp " Path to credentials file (or press Enter to skip): " CREDS_PATH
- if [ -n "$CREDS_PATH" ] && [ -f "$CREDS_PATH" ]; then
- cp "$CREDS_PATH" "$INSTALL_DIR/gcp-credentials.json"
- chmod 600 "$INSTALL_DIR/gcp-credentials.json"
- echo " Credentials copied."
- else
- echo " Skipped. You must add credentials before starting the proxy."
- fi
-else
- echo "[3/4] GCP credentials found."
-fi
-
-# 4. Install systemd service
-echo "[4/4] Installing systemd service..."
-SERVICE_FILE="/etc/systemd/system/asr-proxy.service"
-
-cat > /tmp/asr-proxy.service </dev/null 2>&1; then
- echo "ERROR: Tailscale is not running or not connected."
- echo " Install: curl -fsSL https://tailscale.com/install.sh | sh"
- echo " Connect: sudo tailscale up"
- exit 1
-fi
-
-HOSTNAME=$(tailscale status --json | python3 -c "import sys,json; print(json.load(sys.stdin)['Self']['DNSName'].rstrip('.'))" 2>/dev/null || echo "unknown")
-echo "Tailscale hostname: $HOSTNAME"
-echo
-
-# DictIA app on :443 → localhost:8899
-echo "[1/2] Setting up DictIA app (port 443 → 8899)..."
-if [ "$MODE" = "funnel" ]; then
- tailscale funnel --bg --https=443 http://localhost:8899
-else
- tailscale serve --bg --https=443 http://localhost:8899
-fi
-
-# ASR Proxy dashboard on :9443 → localhost:9090
-echo "[2/2] Setting up ASR Proxy dashboard (port 9443 → 9090)..."
-if [ "$MODE" = "funnel" ]; then
- tailscale funnel --bg --https=9443 http://localhost:9090
-else
- tailscale serve --bg --https=9443 http://localhost:9090
-fi
-
-echo
-echo "=== Setup complete ==="
-echo "DictIA: https://$HOSTNAME/"
-echo "ASR Dashboard: https://$HOSTNAME:9443/"
-echo
-echo "Verify with: tailscale serve status"
diff --git a/deployment/docker/.env.example b/deployment/docker/.env.example
deleted file mode 100644
index fc204f9..0000000
--- a/deployment/docker/.env.example
+++ /dev/null
@@ -1,124 +0,0 @@
-# =============================================================================
-# DictIA — Unified Environment Configuration
-# =============================================================================
-#
-# Copy this file to the project root as .env and edit the values.
-# cp deployment/docker/.env.example .env
-#
-# This template combines upstream settings with DictIA deployment vars.
-# See: config/env.transcription.example for full upstream documentation.
-
-# =============================================================================
-# FLASK SECRET KEY (REQUIRED — auto-generated by setup.sh)
-# =============================================================================
-SECRET_KEY=change-me-to-a-random-string
-
-# =============================================================================
-# DEPLOYMENT PROFILE (used by deployment scripts)
-# =============================================================================
-# Options: cloud, local-cpu, local-gpu
-DICTIA_PROFILE=cloud
-
-# =============================================================================
-# TEXT GENERATION MODEL (REQUIRED for summaries, titles, chat)
-# =============================================================================
-TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
-TEXT_MODEL_API_KEY=your_openrouter_api_key
-TEXT_MODEL_NAME=openai/gpt-4o-mini
-
-# =============================================================================
-# TRANSCRIPTION CONFIGURATION
-# =============================================================================
-# For cloud profile (ASR Proxy → GCP GPU):
-# ASR_BASE_URL is set automatically in docker-compose.cloud.yml
-# No need to set it here.
-#
-# For local profiles (WhisperX sidecar):
-# ASR_BASE_URL is set automatically in docker-compose.local-*.yml
-# No need to set it here.
-#
-# For OpenAI API instead of self-hosted ASR:
-# TRANSCRIPTION_API_KEY=sk-your_openai_api_key
-# TRANSCRIPTION_MODEL=gpt-4o-transcribe-diarize
-
-# ASR model (for local WhisperX profiles)
-ASR_MODEL=large-v3
-
-# HuggingFace token (required for diarization with pyannote)
-# Get yours at: https://huggingface.co/settings/tokens
-# Must accept: https://huggingface.co/pyannote/speaker-diarization-3.1
-HF_TOKEN=
-
-# =============================================================================
-# ASR PROXY — CLOUD PROFILE ONLY
-# =============================================================================
-# GCP project for GPU instances
-# GCP_PROJECT=your-gcp-project
-
-# Monthly GPU budget limit in hours (default: 50)
-# MONTHLY_LIMIT_HOURS=50
-
-# Idle timeout before auto-stopping GPU (seconds, default: 300)
-# IDLE_TIMEOUT=300
-
-# =============================================================================
-# APPLICATION SETTINGS
-# =============================================================================
-ADMIN_USERNAME=admin
-ADMIN_EMAIL=admin@example.com
-ADMIN_PASSWORD=changeme
-
-ALLOW_REGISTRATION=false
-TIMEZONE="America/Toronto"
-LOG_LEVEL=ERROR
-LOCALE=fr_CA
-DEFAULT_LANGUAGE=fr
-SHOW_USERNAMES_IN_UI=true
-SESSION_COOKIE_HTTPONLY=true
-SESSION_COOKIE_SAMESITE=Lax
-SESSION_COOKIE_SECURE=true
-
-# =============================================================================
-# OPTIONAL FEATURES
-# =============================================================================
-ENABLE_INQUIRE_MODE=false
-ENABLE_AUTO_PROCESSING=false
-ENABLE_AUTO_EXPORT=false
-ENABLE_AUTO_DELETION=false
-ENABLE_INTERNAL_SHARING=true
-ENABLE_PUBLIC_SHARING=true
-ENABLE_FOLDERS=true
-VIDEO_RETENTION=true
-USERS_CAN_DELETE=true
-
-# =============================================================================
-# BACKGROUND PROCESSING
-# =============================================================================
-JOB_QUEUE_WORKERS=4
-SUMMARY_QUEUE_WORKERS=4
-JOB_MAX_RETRIES=3
-MAX_CONCURRENT_UPLOADS=3
-
-# =============================================================================
-# TRANSCRIPTION SETTINGS
-# =============================================================================
-TRANSCRIPTION_CONNECTOR=asr_endpoint
-USE_NEW_TRANSCRIPTION_ARCHITECTURE=true
-ENABLE_CHUNKING=true
-CHUNK_LIMIT=2400s
-CHUNK_OVERLAP_SECONDS=5
-
-# =============================================================================
-# LLM / SUMMARY SETTINGS
-# =============================================================================
-SUMMARY_LANGUAGE=fr
-SUMMARY_MAX_TOKENS=16000
-CHAT_MAX_TOKENS=12000
-ENABLE_STREAM_OPTIONS=false
-ENABLE_THINKING=false
-
-# =============================================================================
-# DOCKER/DATABASE
-# =============================================================================
-SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
-UPLOAD_FOLDER=/data/uploads
diff --git a/deployment/docker/docker-compose.cloud.yml b/deployment/docker/docker-compose.cloud.yml
deleted file mode 100644
index d4ae233..0000000
--- a/deployment/docker/docker-compose.cloud.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-# DictIA — Cloud deployment (VPS + ASR Proxy GCP GPU)
-#
-# Usage:
-# docker compose -f deployment/docker/docker-compose.cloud.yml up -d
-#
-# ASR is handled by the external asr-proxy (port 9090) which auto-starts
-# a GCP GPU instance on demand. DictIA connects via host.docker.internal.
-
-services:
- dictia:
- build:
- context: ../..
- dockerfile: Dockerfile
- image: innova-ai/dictia:latest
- container_name: dictia
- restart: unless-stopped
- ports:
- - "8899:8899"
- env_file:
- - ../../.env
- environment:
- - LOG_LEVEL=${LOG_LEVEL:-ERROR}
- - ASR_BASE_URL=http://host.docker.internal:9090
- volumes:
- - ../../data/uploads:/data/uploads
- - ../../data/instance:/data/instance
- extra_hosts:
- - "host.docker.internal:host-gateway"
- healthcheck:
- test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
- interval: 30s
- timeout: 10s
- retries: 3
- start_period: 30s
- networks:
- - dictia-network
-
-networks:
- dictia-network:
- driver: bridge
diff --git a/deployment/docker/docker-compose.local-cpu.yml b/deployment/docker/docker-compose.local-cpu.yml
deleted file mode 100644
index 0a0f060..0000000
--- a/deployment/docker/docker-compose.local-cpu.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-# DictIA — Local CPU deployment (WhisperX on CPU + DictIA)
-#
-# Usage:
-# docker compose -f deployment/docker/docker-compose.local-cpu.yml up -d
-#
-# Warning: CPU transcription is significantly slower than GPU.
-# Expect ~10x real-time for large files (e.g., 1h audio = ~10h processing).
-
-services:
- whisperx-asr:
- image: ghcr.io/jim60105/whisperx-asr:latest
- container_name: whisperx-asr
- restart: unless-stopped
- ports:
- - "9000:9000"
- environment:
- - ASR_MODEL=${ASR_MODEL:-large-v3}
- - ASR_ENGINE=whisperx
- - DEVICE=cpu
- - COMPUTE_TYPE=float32
- - HF_TOKEN=${HF_TOKEN:-}
- volumes:
- - whisperx-cache:/root/.cache
- deploy:
- resources:
- limits:
- memory: 18G
- networks:
- - dictia-network
-
- dictia:
- build:
- context: ../..
- dockerfile: Dockerfile
- image: innova-ai/dictia:latest
- container_name: dictia
- restart: unless-stopped
- ports:
- - "8899:8899"
- env_file:
- - ../../.env
- environment:
- - LOG_LEVEL=${LOG_LEVEL:-ERROR}
- - ASR_BASE_URL=http://whisperx-asr:9000
- volumes:
- - ../../data/uploads:/data/uploads
- - ../../data/instance:/data/instance
- depends_on:
- - whisperx-asr
- healthcheck:
- test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
- interval: 30s
- timeout: 10s
- retries: 3
- start_period: 30s
- networks:
- - dictia-network
-
-volumes:
- whisperx-cache:
-
-networks:
- dictia-network:
- driver: bridge
diff --git a/deployment/docker/docker-compose.local-gpu.yml b/deployment/docker/docker-compose.local-gpu.yml
deleted file mode 100644
index 488fd74..0000000
--- a/deployment/docker/docker-compose.local-gpu.yml
+++ /dev/null
@@ -1,69 +0,0 @@
-# DictIA — Local GPU deployment (WhisperX on NVIDIA GPU + DictIA)
-#
-# Usage:
-# docker compose -f deployment/docker/docker-compose.local-gpu.yml up -d
-#
-# Prerequisites:
-# - NVIDIA GPU with CUDA support
-# - nvidia-container-toolkit installed
-# - Docker configured with nvidia runtime
-
-services:
- whisperx-asr:
- image: ghcr.io/jim60105/whisperx-asr:latest-cuda
- container_name: whisperx-asr
- restart: unless-stopped
- ports:
- - "9000:9000"
- environment:
- - ASR_MODEL=${ASR_MODEL:-large-v3}
- - ASR_ENGINE=whisperx
- - DEVICE=cuda
- - COMPUTE_TYPE=float16
- - HF_TOKEN=${HF_TOKEN:-}
- volumes:
- - whisperx-cache:/root/.cache
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- count: 1
- capabilities: [gpu]
- networks:
- - dictia-network
-
- dictia:
- build:
- context: ../..
- dockerfile: Dockerfile
- image: innova-ai/dictia:latest
- container_name: dictia
- restart: unless-stopped
- ports:
- - "8899:8899"
- env_file:
- - ../../.env
- environment:
- - LOG_LEVEL=${LOG_LEVEL:-ERROR}
- - ASR_BASE_URL=http://whisperx-asr:9000
- volumes:
- - ../../data/uploads:/data/uploads
- - ../../data/instance:/data/instance
- depends_on:
- - whisperx-asr
- healthcheck:
- test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
- interval: 30s
- timeout: 10s
- retries: 3
- start_period: 30s
- networks:
- - dictia-network
-
-volumes:
- whisperx-cache:
-
-networks:
- dictia-network:
- driver: bridge
diff --git a/deployment/docs/LOCAL-SETUP.md b/deployment/docs/LOCAL-SETUP.md
deleted file mode 100644
index f534972..0000000
--- a/deployment/docs/LOCAL-SETUP.md
+++ /dev/null
@@ -1,118 +0,0 @@
-# Setup Local — DictIA
-
-Guide pour deployer DictIA localement avec GPU NVIDIA ou CPU.
-
-## Profil local-gpu
-
-### Prerequis
-
-- NVIDIA GPU avec support CUDA
-- [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
-- Docker + Docker Compose V2
-- 8GB+ RAM (16GB recommande)
-- Token HuggingFace (pour la diarisation)
-
-### Installation nvidia-container-toolkit
-
-```bash
-# Ubuntu/Debian
-curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \
- sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
-curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
- sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
- sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
-sudo apt-get update
-sudo apt-get install -y nvidia-container-toolkit
-sudo nvidia-ctk runtime configure --runtime=docker
-sudo systemctl restart docker
-
-# Verifier
-docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
-```
-
-### Setup DictIA
-
-```bash
-cd dictia
-bash deployment/setup.sh --profile local-gpu
-```
-
-Le setup va verifier:
-- nvidia-container-toolkit installe
-- GPU accessible depuis Docker
-- Assez de RAM disponible
-
-### Configuration du modele
-
-Par defaut, WhisperX utilise `large-v3`. Pour changer:
-
-```bash
-# Editer .env
-ASR_MODEL=large-v3 # Meilleure qualite
-# ASR_MODEL=medium # Plus rapide, qualite correcte
-# ASR_MODEL=small # Tres rapide, qualite reduite
-```
-
----
-
-## Profil local-cpu
-
-### Prerequis
-
-- Docker + Docker Compose V2
-- 18GB+ RAM (WhisperX CPU est gourmand)
-- Patience (transcription ~10x temps reel)
-
-### Setup
-
-```bash
-cd dictia
-bash deployment/setup.sh --profile local-cpu
-```
-
-### Limitations
-
-- Transcription lente: 1h d'audio prend ~10h
-- Utilise float32 (pas de GPU acceleration)
-- Limite memoire a 18GB par defaut
-- Recommande pour: tests, petits fichiers, demos
-
-Pour reduire l'utilisation memoire, utiliser un modele plus petit:
-
-```bash
-# Editer .env
-ASR_MODEL=small # ou medium, base, tiny
-```
-
----
-
-## Verification
-
-```bash
-# Health check
-bash deployment/tools/health-check.sh
-
-# Test rapide: ouvrir le navigateur
-open http://localhost:8899
-
-# Verifier WhisperX
-curl http://localhost:9000/health
-```
-
-## Gestion des containers
-
-```bash
-COMPOSE_FILE=deployment/docker/docker-compose.local-gpu.yml # ou local-cpu
-
-# Logs
-docker compose -f $COMPOSE_FILE logs -f
-
-# Redemarrer
-docker compose -f $COMPOSE_FILE restart
-
-# Arreter
-docker compose -f $COMPOSE_FILE down
-
-# Voir l'utilisation GPU
-nvidia-smi # (profil GPU seulement)
-```
diff --git a/deployment/docs/MAINTENANCE.md b/deployment/docs/MAINTENANCE.md
deleted file mode 100644
index f43b963..0000000
--- a/deployment/docs/MAINTENANCE.md
+++ /dev/null
@@ -1,136 +0,0 @@
-# Maintenance — DictIA
-
-## Backup
-
-```bash
-# Backup complet (data, .env, volumes, stats ASR)
-bash deployment/tools/backup.sh
-
-# Backup dans un repertoire specifique
-bash deployment/tools/backup.sh /mnt/backups
-```
-
-Les backups sont sauvegardes dans `backups/` avec rotation automatique (garde les 5 derniers).
-
-Contenu d'un backup:
-- `data/` — uploads et base de donnees SQLite
-- `dot-env` — fichier de configuration
-- `asr-usage-stats.json` — stats d'utilisation GPU
-- `whisperx-cache.tar.gz` — cache modeles (si volume Docker)
-- `manifest.json` — metadonnees du backup
-
-### Schedule recommande
-
-| Frequence | Action |
-|-----------|--------|
-| Quotidien | `bash deployment/tools/backup.sh` |
-| Hebdomadaire | Copier le backup sur un stockage externe |
-| Mensuel | Verifier la restauration sur un environnement de test |
-
-Pour automatiser avec cron:
-
-```bash
-# Backup quotidien a 3h du matin
-0 3 * * * /opt/dictia/deployment/tools/backup.sh >> /var/log/dictia-backup.log 2>&1
-```
-
-## Restore
-
-```bash
-# Lister les backups disponibles
-ls -la backups/
-
-# Restaurer un backup
-bash deployment/tools/restore.sh backups/dictia-20260211-030000.tar.gz
-```
-
-Le script:
-1. Valide l'archive (presence du manifest)
-2. Demande confirmation
-3. Arrete les containers
-4. Restaure les fichiers
-5. Redemarre les containers
-
-## Mise a jour
-
-```bash
-# Mise a jour complete (git pull + rebuild + restart)
-bash deployment/tools/update.sh
-
-# Rebuild seulement (sans git pull)
-bash deployment/tools/update.sh --no-pull
-
-# Git pull seulement (sans rebuild)
-bash deployment/tools/update.sh --no-build
-```
-
-Le script:
-1. Detecte le profil actif automatiquement
-2. `git pull origin dictia-branding`
-3. `docker build -t innova-ai/dictia:latest .`
-4. Pull WhisperX upstream (profils locaux)
-5. `docker compose down && up -d`
-6. Attend le health check
-7. Nettoie les images dangling
-
-## Monitoring
-
-### Health check
-
-```bash
-# Diagnostic complet (humain)
-bash deployment/tools/health-check.sh
-
-# JSON (pour alertes/scripts)
-bash deployment/tools/health-check.sh --json
-
-# Code de sortie seulement (0=ok, 1=probleme)
-bash deployment/tools/health-check.sh --quiet
-```
-
-### Logs
-
-```bash
-# DictIA
-docker logs dictia -f --tail 100
-
-# WhisperX (profils locaux)
-docker logs whisperx-asr -f --tail 100
-
-# ASR Proxy (profil cloud)
-journalctl -u asr-proxy -f
-```
-
-### Dashboard GPU (profil cloud)
-
-Le dashboard de monitoring GPU est accessible a:
-- `http://localhost:9090` (local)
-- `https://votre-hostname.tailnet.ts.net:9443` (Tailscale)
-
-Affiche: statut GPU, cout mensuel, historique des requetes, zones de fallback.
-
-### Metriques cles
-
-```bash
-# Espace disque (les transcriptions grossissent)
-df -h /opt/dictia/data/
-
-# Utilisation memoire (WhisperX est gourmand)
-docker stats --no-stream
-
-# Stats GPU (profil cloud)
-curl -s http://localhost:9090/stats | python3 -m json.tool
-```
-
-## Maintenance Docker
-
-```bash
-# Nettoyer les images orphelines
-docker image prune -f
-
-# Nettoyer tout (attention: supprime les volumes non utilises)
-# docker system prune -a --volumes
-
-# Verifier l'espace Docker
-docker system df
-```
diff --git a/deployment/docs/QUICKSTART.md b/deployment/docs/QUICKSTART.md
deleted file mode 100644
index f057175..0000000
--- a/deployment/docs/QUICKSTART.md
+++ /dev/null
@@ -1,90 +0,0 @@
-# Quickstart — DictIA
-
-## Prerequis communs
-
-- Docker + Docker Compose V2
-- Git
-- 2GB+ RAM disponible
-
-```bash
-git clone https://gitea.innova-ai.ca/Innova-AI/dictia.git
-cd dictia
-git checkout dictia-branding
-```
-
----
-
-## Profil Cloud (VPS + GCP GPU)
-
-Le GPU demarre automatiquement quand quelqu'un transcrit, et s'arrete apres 5 min d'inactivite.
-
-```bash
-# 1. Setup interactif
-bash deployment/setup.sh --profile cloud
-
-# 2. Setup ASR Proxy (GCP credentials requises)
-bash deployment/asr-proxy/setup.sh
-
-# 3. Optionnel: Tailscale Serve pour HTTPS
-bash deployment/config/tailscale/setup-serve.sh
-```
-
-**Requis**: credentials GCP (service account ou OAuth) dans `deployment/asr-proxy/gcp-credentials.json`.
-
----
-
-## Profil Local GPU
-
-Transcription locale sur GPU NVIDIA. Le plus rapide.
-
-```bash
-# Prerequis: nvidia-container-toolkit
-# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html
-
-# Setup
-bash deployment/setup.sh --profile local-gpu
-```
-
-**Requis**: token HuggingFace pour la diarisation (pyannote).
-
----
-
-## Profil Local CPU
-
-Transcription sur CPU. Lent mais fonctionnel pour tester.
-
-```bash
-bash deployment/setup.sh --profile local-cpu
-```
-
-Prevoir ~10x le temps reel (1h audio = ~10h de traitement).
-
----
-
-## Apres l'installation
-
-```bash
-# Verifier que tout fonctionne
-bash deployment/tools/health-check.sh
-
-# Ouvrir DictIA
-open http://localhost:8899
-```
-
-Se connecter avec les identifiants admin configures pendant le setup.
-
-## Commandes utiles
-
-```bash
-# Logs en temps reel
-docker compose -f deployment/docker/docker-compose..yml logs -f
-
-# Redemarrer
-docker compose -f deployment/docker/docker-compose..yml restart
-
-# Mise a jour
-bash deployment/tools/update.sh
-
-# Backup
-bash deployment/tools/backup.sh
-```
diff --git a/deployment/docs/TROUBLESHOOTING.md b/deployment/docs/TROUBLESHOOTING.md
deleted file mode 100644
index d733a7c..0000000
--- a/deployment/docs/TROUBLESHOOTING.md
+++ /dev/null
@@ -1,177 +0,0 @@
-# Troubleshooting — DictIA
-
-## WhisperX OOM (Out of Memory)
-
-**Symptome**: Container `whisperx-asr` crash ou restart en boucle.
-
-**Cause**: Modele trop gros pour la RAM/VRAM disponible.
-
-**Solutions**:
-```bash
-# Utiliser un modele plus petit dans .env
-ASR_MODEL=medium # au lieu de large-v3
-
-# Augmenter la limite memoire (local-cpu)
-# Editer docker-compose.local-cpu.yml
-deploy:
- resources:
- limits:
- memory: 24G # au lieu de 18G
-```
-
-## Diarisation 403 Forbidden
-
-**Symptome**: Erreur 403 lors de la transcription avec diarisation.
-
-**Cause**: Token HuggingFace manquant ou conditions non acceptees.
-
-**Solution**:
-1. Creer un token: https://huggingface.co/settings/tokens
-2. Accepter les conditions: https://huggingface.co/pyannote/speaker-diarization-3.1
-3. Ajouter dans `.env`:
-```bash
-HF_TOKEN=hf_votre_token
-```
-4. Redemarrer: `docker compose -f deployment/docker/docker-compose..yml restart`
-
-## GPU non detecte (local-gpu)
-
-**Symptome**: `nvidia-smi` fonctionne mais Docker ne voit pas le GPU.
-
-**Solution**:
-```bash
-# Installer nvidia-container-toolkit
-sudo apt install -y nvidia-container-toolkit
-sudo nvidia-ctk runtime configure --runtime=docker
-sudo systemctl restart docker
-
-# Verifier
-docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
-```
-
-## Upload echoue (fichiers volumineux)
-
-**Symptome**: Upload de gros fichiers (>100MB) echoue.
-
-**Causes possibles**:
-- Timeout Nginx/reverse proxy
-- Limite upload trop basse
-
-**Solutions**:
-```bash
-# Si Nginx: verifier client_max_body_size dans dictia.conf
-client_max_body_size 500M;
-
-# Si Tailscale Serve: pas de limite cote Tailscale
-
-# Timeout gunicorn (dans le Dockerfile, deja a 600s)
-# Pour des fichiers tres longs, augmenter dans docker-compose:
-environment:
- - GUNICORN_TIMEOUT=1200
-```
-
-## Container dictia "unhealthy"
-
-**Symptome**: `docker ps` montre "unhealthy" pour le container dictia.
-
-**Diagnostic**:
-```bash
-# Voir les logs
-docker logs dictia --tail 50
-
-# Tester manuellement
-docker exec dictia python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"
-```
-
-**Causes courantes**:
-- `.env` mal configure (SECRET_KEY manquant)
-- Base de donnees corrompue (restaurer backup)
-- Port 8899 deja utilise
-
-## ASR Proxy: "No GPU available"
-
-**Symptome**: Transcription echoue avec "No GPU available in any Canadian zone".
-
-**Causes**:
-- GCP n'a pas de GPU disponible (capacite epuisee)
-- Credentials GCP expirees
-- Budget mensuel atteint
-
-**Diagnostic**:
-```bash
-# Verifier le statut du proxy
-curl -s http://localhost:9090/health | python3 -m json.tool
-
-# Verifier les stats (budget)
-curl -s http://localhost:9090/stats | python3 -m json.tool
-
-# Voir les logs
-journalctl -u asr-proxy --since "1 hour ago"
-```
-
-**Solutions**:
-- Attendre (GCP libere des GPUs regulierement)
-- Le proxy reessaie automatiquement apres un cooldown de 3 minutes
-- Verifier le dashboard: http://localhost:9090
-
-## Build Docker lent/echoue
-
-**Symptome**: `docker build` prend trop de temps ou echoue.
-
-**Solutions**:
-```bash
-# Limiter les ressources si le VPS est petit
-docker build --memory=2g --cpus=2 -t innova-ai/dictia:latest .
-
-# Nettoyer le cache Docker si le disque est plein
-docker builder prune -f
-docker image prune -f
-```
-
-## Base de donnees corrompue
-
-**Symptome**: Erreur SQLite au demarrage.
-
-**Solution**:
-```bash
-# Restaurer le dernier backup
-bash deployment/tools/restore.sh backups/dictia-LATEST.tar.gz
-
-# Ou recreer la base (perd les donnees)
-rm data/instance/transcriptions.db
-docker compose -f deployment/docker/docker-compose..yml restart
-```
-
-## Port 8899 deja utilise
-
-```bash
-# Trouver qui utilise le port
-sudo lsof -i :8899
-# ou
-sudo ss -tlnp | grep 8899
-
-# Arreter le processus ou changer le port dans docker-compose
-ports:
- - "8900:8899" # utiliser 8900 a la place
-```
-
-## Mise a jour qui casse tout
-
-```bash
-# Rollback: revenir au commit precedent
-cd dictia
-git log --oneline -5 # trouver le bon commit
-git checkout
-
-# Rebuild et redemarrer
-docker build -t innova-ai/dictia:latest .
-docker compose -f deployment/docker/docker-compose..yml down
-docker compose -f deployment/docker/docker-compose..yml up -d
-```
-
-## Commande de diagnostic rapide
-
-```bash
-# Tout verifier d'un coup
-bash deployment/tools/health-check.sh --json | python3 -m json.tool
-```
diff --git a/deployment/docs/VPS-SETUP.md b/deployment/docs/VPS-SETUP.md
deleted file mode 100644
index deff17d..0000000
--- a/deployment/docs/VPS-SETUP.md
+++ /dev/null
@@ -1,148 +0,0 @@
-# Setup VPS from scratch — DictIA
-
-Guide complet pour deployer DictIA sur un VPS Ubuntu.
-Teste sur OVH VPS avec Ubuntu 22.04/24.04.
-
-## 1. Preparation du VPS
-
-```bash
-# Mise a jour systeme
-sudo apt update && sudo apt upgrade -y
-
-# Installer les essentiels
-sudo apt install -y curl git
-```
-
-## 2. Docker
-
-```bash
-# Installer Docker (methode officielle)
-curl -fsSL https://get.docker.com | sh
-
-# Ajouter l'utilisateur au groupe docker
-sudo usermod -aG docker $USER
-
-# Se reconnecter pour appliquer le groupe
-exit
-# (reconnecter via SSH)
-
-# Verifier
-docker --version
-docker compose version
-```
-
-## 3. Tailscale (recommande)
-
-Tailscale fournit un VPN mesh pour acceder au VPS sans exposer de ports publics.
-
-```bash
-# Installer Tailscale
-curl -fsSL https://tailscale.com/install.sh | sh
-
-# Connecter au tailnet
-sudo tailscale up
-
-# Verifier
-tailscale status
-```
-
-## 4. DictIA
-
-```bash
-# Cloner le repo
-cd ~
-git clone https://gitea.innova-ai.ca/Innova-AI/dictia.git
-cd dictia
-git checkout dictia-branding
-
-# Lancer le setup
-bash deployment/setup.sh --profile cloud
-```
-
-Le setup va:
-- Generer le `.env` avec vos identifiants
-- Creer les repertoires de donnees
-- Builder l'image Docker
-- Demarrer les containers
-
-## 5. ASR Proxy (GCP GPU)
-
-```bash
-# Installer le proxy
-bash deployment/asr-proxy/setup.sh
-
-# Ajouter les credentials GCP
-# Copier votre fichier de credentials dans:
-cp ~/gcp-credentials.json deployment/asr-proxy/gcp-credentials.json
-
-# Demarrer le service
-sudo systemctl start asr-proxy
-sudo systemctl status asr-proxy
-```
-
-## 6. Securite
-
-```bash
-# Docker daemon config (log rotation)
-sudo cp deployment/security/docker-daemon.json /etc/docker/daemon.json
-sudo systemctl restart docker
-
-# Firewall iptables (bloque trafic non-Tailscale)
-sudo bash deployment/security/iptables-rules.sh
-
-# Service systemd pour les regles au boot
-sudo cp deployment/security/docker-iptables.service /etc/systemd/system/
-sudo systemctl daemon-reload
-sudo systemctl enable docker-iptables
-```
-
-## 7. Tailscale Serve (HTTPS)
-
-```bash
-# Expose DictIA et le dashboard ASR via Tailscale HTTPS
-bash deployment/config/tailscale/setup-serve.sh
-
-# Verifier
-tailscale serve status
-```
-
-DictIA sera accessible a `https://votre-hostname.tailnet.ts.net/`.
-
-## 8. Service systemd (auto-start)
-
-```bash
-# Adapter le chemin dans le fichier si necessaire
-sudo cp deployment/config/systemd/dictia.service /etc/systemd/system/
-sudo systemctl daemon-reload
-sudo systemctl enable dictia
-```
-
-## 9. Verification
-
-```bash
-# Health check complet
-bash deployment/tools/health-check.sh
-
-# Verifier les endpoints
-curl -s http://localhost:8899/health
-curl -s http://localhost:9090/health
-```
-
-## 10. Premier backup
-
-```bash
-bash deployment/tools/backup.sh
-```
-
----
-
-## Checklist post-installation
-
-- [ ] DictIA repond sur :8899
-- [ ] ASR Proxy repond sur :9090
-- [ ] Tailscale Serve configure
-- [ ] iptables: seul Tailscale peut acceder
-- [ ] Docker: log rotation configuree
-- [ ] Service systemd enable (auto-start au boot)
-- [ ] Premier backup effectue
-- [ ] Identifiants admin testes
diff --git a/deployment/profiles/docker-compose.dictia16.yml b/deployment/profiles/docker-compose.dictia16.yml
deleted file mode 100644
index a553bb7..0000000
--- a/deployment/profiles/docker-compose.dictia16.yml
+++ /dev/null
@@ -1,101 +0,0 @@
-# =============================================================================
-# DictIA 16 — Docker Compose
-# GPU : RTX 5070 Ti (16 Go VRAM)
-# =============================================================================
-#
-# Services :
-# - dictia : Application principale DictIA
-# - whisperx-asr : Service de transcription WhisperX Large-v3
-# - ollama : LLM local Mistral 7B (résumés, chat, Q&A)
-#
-# Démarrage :
-# 1. cp config/env.dictia16.example .env
-# 2. docker compose -f config/docker-compose.dictia16.yml up -d
-# 3. Télécharger Mistral : docker exec ollama ollama pull mistral
-#
-# Note : Aucune clé API nécessaire — tout tourne en local (100% privé).
-# =============================================================================
-
-services:
-
- # ---------------------------------------------------------------------------
- # Application DictIA
- # ---------------------------------------------------------------------------
- dictia:
- image: dictia:latest
- container_name: dictia
- restart: unless-stopped
- ports:
- - "8899:8899"
- env_file:
- - ../.env
- environment:
- - LOG_LEVEL=ERROR
- volumes:
- - ../uploads:/data/uploads
- - ../instance:/data/instance
- # Décommenter pour l'export automatique :
- # - ../exports:/data/exports
- # Décommenter pour le traitement automatique :
- # - ../auto-process:/data/auto-process
- depends_on:
- - whisperx-asr
- - ollama
- networks:
- - dictia-net
-
- # ---------------------------------------------------------------------------
- # WhisperX ASR — Transcription locale (WhisperX Large-v3)
- # RTX 5070 Ti : BATCH_SIZE=32, COMPUTE_TYPE=float16
- # ---------------------------------------------------------------------------
- whisperx-asr:
- image: murtazanasir/whisperx-asr-service:latest
- container_name: whisperx-asr
- restart: unless-stopped
- environment:
- - HF_TOKEN=${HF_TOKEN}
- - DEVICE=cuda
- - COMPUTE_TYPE=float16
- - BATCH_SIZE=32
- - DEFAULT_MODEL=large-v3
- volumes:
- - whisperx-models:/root/.cache
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- count: 1
- capabilities: [gpu]
- networks:
- - dictia-net
-
- # ---------------------------------------------------------------------------
- # Ollama — LLM local Mistral 7B
- # Résumés, points d'action, Q&A — 100% local, aucune donnée externe
- # ---------------------------------------------------------------------------
- ollama:
- image: ollama/ollama:latest
- container_name: ollama
- restart: unless-stopped
- volumes:
- - ollama-models:/root/.ollama
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- count: 1
- capabilities: [gpu]
- networks:
- - dictia-net
-
-networks:
- dictia-net:
- driver: bridge
-
-volumes:
- whisperx-models:
- driver: local
- ollama-models:
- driver: local
diff --git a/deployment/profiles/docker-compose.dictia8.yml b/deployment/profiles/docker-compose.dictia8.yml
deleted file mode 100644
index dc4c6ed..0000000
--- a/deployment/profiles/docker-compose.dictia8.yml
+++ /dev/null
@@ -1,75 +0,0 @@
-# =============================================================================
-# DictIA 8 — Docker Compose
-# GPU : RTX 5060 (8 Go VRAM)
-# =============================================================================
-#
-# Services :
-# - dictia : Application principale DictIA
-# - whisperx-asr : Service de transcription WhisperX Large-v3
-#
-# Démarrage :
-# 1. cp config/env.dictia8.example .env
-# 2. Remplir TEXT_MODEL_API_KEY dans .env
-# 3. docker compose -f config/docker-compose.dictia8.yml up -d
-# =============================================================================
-
-services:
-
- # ---------------------------------------------------------------------------
- # Application DictIA
- # ---------------------------------------------------------------------------
- dictia:
- image: dictia:latest
- container_name: dictia
- restart: unless-stopped
- ports:
- - "8899:8899"
- env_file:
- - ../.env
- environment:
- - LOG_LEVEL=ERROR
- volumes:
- - ../uploads:/data/uploads
- - ../instance:/data/instance
- # Décommenter pour l'export automatique :
- # - ../exports:/data/exports
- # Décommenter pour le traitement automatique :
- # - ../auto-process:/data/auto-process
- depends_on:
- - whisperx-asr
- networks:
- - dictia-net
-
- # ---------------------------------------------------------------------------
- # WhisperX ASR — Transcription locale (WhisperX Large-v3)
- # RTX 5060 : BATCH_SIZE=16, COMPUTE_TYPE=float16
- # ---------------------------------------------------------------------------
- whisperx-asr:
- image: murtazanasir/whisperx-asr-service:latest
- container_name: whisperx-asr
- restart: unless-stopped
- environment:
- - HF_TOKEN=${HF_TOKEN}
- - DEVICE=cuda
- - COMPUTE_TYPE=float16
- - BATCH_SIZE=16
- - DEFAULT_MODEL=large-v3
- volumes:
- - whisperx-models:/root/.cache
- deploy:
- resources:
- reservations:
- devices:
- - driver: nvidia
- count: 1
- capabilities: [gpu]
- networks:
- - dictia-net
-
-networks:
- dictia-net:
- driver: bridge
-
-volumes:
- whisperx-models:
- driver: local
diff --git a/deployment/profiles/env.dictia16.example b/deployment/profiles/env.dictia16.example
deleted file mode 100644
index 8335fe1..0000000
--- a/deployment/profiles/env.dictia16.example
+++ /dev/null
@@ -1,134 +0,0 @@
-# =============================================================================
-# DictIA 16 — Configuration (.env)
-# GPU : RTX 5070 Ti (16 Go VRAM)
-# =============================================================================
-#
-# Architecture :
-# - Transcription : WhisperX Large-v3 (local, ~5,5 Go VRAM)
-# - LLM (résumés) : Mistral 7B local via Ollama (~6,4 Go VRAM)
-# - Mode : Séquentiel (transcription puis résumé)
-# - Total VRAM : ~11,9 Go / 16 Go (marge ~4,1 Go)
-#
-# Démarrage rapide :
-# 1. cp config/env.dictia16.example .env
-# 2. Aucune clé API nécessaire — tout tourne en local
-# 3. docker compose -f config/docker-compose.dictia16.yml up -d
-# =============================================================================
-
-# =============================================================================
-# MODÈLE DE TEXTE — Résumés, titres, chat (LLM LOCAL)
-# =============================================================================
-# DictIA 16 utilise Mistral 7B en local via Ollama.
-# Aucune donnée ne quitte le serveur — 100% privé.
-
-TEXT_MODEL_BASE_URL=http://ollama:11434/v1
-TEXT_MODEL_API_KEY=not-required
-TEXT_MODEL_NAME=mistral
-
-# --- Modèle de chat séparé (optionnel) ---
-# Même modèle par défaut, mais peut être changé pour un modèle plus rapide.
-# CHAT_MODEL_API_KEY=not-required
-# CHAT_MODEL_BASE_URL=http://ollama:11434/v1
-# CHAT_MODEL_NAME=mistral
-
-# =============================================================================
-# TRANSCRIPTION — WhisperX ASR local (REQUIS)
-# =============================================================================
-# WhisperX tourne en local dans un conteneur Docker séparé.
-# Le service ASR est défini dans docker-compose.dictia16.yml.
-
-ASR_BASE_URL=http://whisperx-asr:9000
-
-# Diarisation (identification automatique des locuteurs) — recommandé
-ASR_DIARIZE=true
-ASR_RETURN_SPEAKER_EMBEDDINGS=true
-
-# Nombre de locuteurs attendus (optionnel — aide la précision)
-# ASR_MIN_SPEAKERS=1
-# ASR_MAX_SPEAKERS=6
-
-# =============================================================================
-# PARAMÈTRES ADMINISTRATEUR
-# =============================================================================
-ADMIN_USERNAME=admin
-ADMIN_EMAIL=admin@votreentreprise.com
-ADMIN_PASSWORD=changeme
-
-# =============================================================================
-# ACCÈS ET INSCRIPTION
-# =============================================================================
-# Désactiver l'inscription publique (accès sur invitation uniquement)
-ALLOW_REGISTRATION=false
-
-# Restreindre l'inscription aux domaines autorisés
-# Exemple : REGISTRATION_ALLOWED_DOMAINS=votreentreprise.com
-REGISTRATION_ALLOWED_DOMAINS=
-
-# =============================================================================
-# FUSEAU HORAIRE
-# =============================================================================
-# Exemples : America/Toronto, America/Montreal, America/New_York, UTC
-TIMEZONE="America/Toronto"
-
-# =============================================================================
-# LIMITES DE TOKENS
-# =============================================================================
-SUMMARY_MAX_TOKENS=8000
-CHAT_MAX_TOKENS=5000
-
-# =============================================================================
-# COMPRESSION AUDIO
-# =============================================================================
-AUDIO_COMPRESS_UPLOADS=true
-AUDIO_CODEC=mp3
-AUDIO_BITRATE=128k
-
-# =============================================================================
-# FONCTIONNALITÉS OPTIONNELLES
-# =============================================================================
-
-# Inquire Mode — recherche IA sur tous les enregistrements
-# Peut être activé sur DictIA 16 (plus de VRAM disponible)
-ENABLE_INQUIRE_MODE=false
-
-# Traitement automatique de fichiers (dossier surveillé)
-ENABLE_AUTO_PROCESSING=false
-# AUTO_PROCESS_MODE=admin_only
-# AUTO_PROCESS_WATCH_DIR=/data/auto-process
-
-# Export automatique
-ENABLE_AUTO_EXPORT=false
-# AUTO_EXPORT_DIR=/data/exports
-# AUTO_EXPORT_TRANSCRIPTION=true
-# AUTO_EXPORT_SUMMARY=true
-
-# Suppression automatique / rétention
-ENABLE_AUTO_DELETION=false
-# GLOBAL_RETENTION_DAYS=90
-# DELETION_MODE=audio_only
-
-# =============================================================================
-# PARTAGE
-# =============================================================================
-ENABLE_INTERNAL_SHARING=false
-ENABLE_PUBLIC_SHARING=true
-USERS_CAN_DELETE=true
-
-# =============================================================================
-# FILES D'ATTENTE DE TRAITEMENT
-# =============================================================================
-JOB_QUEUE_WORKERS=2
-SUMMARY_QUEUE_WORKERS=2
-JOB_MAX_RETRIES=3
-
-# =============================================================================
-# BASE DE DONNÉES ET STOCKAGE
-# =============================================================================
-SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
-UPLOAD_FOLDER=/data/uploads
-
-# =============================================================================
-# JOURNALISATION
-# =============================================================================
-# ERROR = production (minimal), INFO = débogage, DEBUG = développement
-LOG_LEVEL=ERROR
diff --git a/deployment/profiles/env.dictia8.example b/deployment/profiles/env.dictia8.example
deleted file mode 100644
index 3efbbe5..0000000
--- a/deployment/profiles/env.dictia8.example
+++ /dev/null
@@ -1,126 +0,0 @@
-# =============================================================================
-# DictIA 8 — Configuration (.env)
-# GPU : RTX 5060 (8 Go VRAM)
-# =============================================================================
-#
-# Architecture :
-# - Transcription : WhisperX Large-v3 (local, ~5,5 Go VRAM)
-# - LLM (résumés) : API cloud via OpenRouter (VRAM insuffisante pour LLM local)
-#
-# Démarrage rapide :
-# 1. cp config/env.dictia8.example .env
-# 2. Remplir TRANSCRIPTION_API_KEY et TEXT_MODEL_API_KEY
-# 3. docker compose -f config/docker-compose.dictia8.yml up -d
-# =============================================================================
-
-# =============================================================================
-# MODÈLE DE TEXTE — Résumés, titres, chat (REQUIS)
-# =============================================================================
-# DictIA 8 utilise un LLM cloud via OpenRouter (VRAM insuffisante pour LLM local).
-# Inscrivez-vous sur https://openrouter.ai pour obtenir une clé API.
-
-TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
-TEXT_MODEL_API_KEY=votre_cle_openrouter
-TEXT_MODEL_NAME=openai/gpt-4o-mini
-
-# =============================================================================
-# TRANSCRIPTION — WhisperX ASR local (REQUIS)
-# =============================================================================
-# WhisperX tourne en local dans un conteneur Docker séparé.
-# Le service ASR est défini dans docker-compose.dictia8.yml.
-
-ASR_BASE_URL=http://whisperx-asr:9000
-
-# Diarisation (identification automatique des locuteurs) — recommandé
-ASR_DIARIZE=true
-ASR_RETURN_SPEAKER_EMBEDDINGS=true
-
-# Nombre de locuteurs attendus (optionnel — aide la précision)
-# ASR_MIN_SPEAKERS=1
-# ASR_MAX_SPEAKERS=6
-
-# =============================================================================
-# PARAMÈTRES ADMINISTRATEUR
-# =============================================================================
-ADMIN_USERNAME=admin
-ADMIN_EMAIL=admin@votreentreprise.com
-ADMIN_PASSWORD=changeme
-
-# =============================================================================
-# ACCÈS ET INSCRIPTION
-# =============================================================================
-# Désactiver l'inscription publique (accès sur invitation uniquement)
-ALLOW_REGISTRATION=false
-
-# Restreindre l'inscription aux domaines autorisés
-# Exemple : REGISTRATION_ALLOWED_DOMAINS=votreentreprise.com
-REGISTRATION_ALLOWED_DOMAINS=
-
-# =============================================================================
-# FUSEAU HORAIRE
-# =============================================================================
-# Exemples : America/Toronto, America/Montreal, America/New_York, UTC
-TIMEZONE="America/Toronto"
-
-# =============================================================================
-# LIMITES DE TOKENS
-# =============================================================================
-SUMMARY_MAX_TOKENS=8000
-CHAT_MAX_TOKENS=5000
-
-# =============================================================================
-# COMPRESSION AUDIO
-# =============================================================================
-AUDIO_COMPRESS_UPLOADS=true
-AUDIO_CODEC=mp3
-AUDIO_BITRATE=128k
-
-# =============================================================================
-# FONCTIONNALITÉS OPTIONNELLES
-# =============================================================================
-
-# Inquire Mode — recherche IA sur tous les enregistrements
-# Désactivé sur DictIA 8 (VRAM insuffisante pour embeddings locaux)
-ENABLE_INQUIRE_MODE=false
-
-# Traitement automatique de fichiers (dossier surveillé)
-ENABLE_AUTO_PROCESSING=false
-# AUTO_PROCESS_MODE=admin_only
-# AUTO_PROCESS_WATCH_DIR=/data/auto-process
-
-# Export automatique
-ENABLE_AUTO_EXPORT=false
-# AUTO_EXPORT_DIR=/data/exports
-# AUTO_EXPORT_TRANSCRIPTION=true
-# AUTO_EXPORT_SUMMARY=true
-
-# Suppression automatique / rétention
-ENABLE_AUTO_DELETION=false
-# GLOBAL_RETENTION_DAYS=90
-# DELETION_MODE=audio_only
-
-# =============================================================================
-# PARTAGE
-# =============================================================================
-ENABLE_INTERNAL_SHARING=false
-ENABLE_PUBLIC_SHARING=true
-USERS_CAN_DELETE=true
-
-# =============================================================================
-# FILES D'ATTENTE DE TRAITEMENT
-# =============================================================================
-JOB_QUEUE_WORKERS=2
-SUMMARY_QUEUE_WORKERS=2
-JOB_MAX_RETRIES=3
-
-# =============================================================================
-# BASE DE DONNÉES ET STOCKAGE
-# =============================================================================
-SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
-UPLOAD_FOLDER=/data/uploads
-
-# =============================================================================
-# JOURNALISATION
-# =============================================================================
-# ERROR = production (minimal), INFO = débogage, DEBUG = développement
-LOG_LEVEL=ERROR
diff --git a/deployment/security/docker-daemon.json b/deployment/security/docker-daemon.json
deleted file mode 100644
index 217a460..0000000
--- a/deployment/security/docker-daemon.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
- "log-driver": "json-file",
- "log-opts": {
- "max-size": "10m",
- "max-file": "3"
- },
- "storage-driver": "overlay2"
-}
diff --git a/deployment/security/docker-iptables.service b/deployment/security/docker-iptables.service
deleted file mode 100644
index 5a78b28..0000000
--- a/deployment/security/docker-iptables.service
+++ /dev/null
@@ -1,12 +0,0 @@
-[Unit]
-Description=DictIA Docker iptables rules
-After=docker.service tailscaled.service
-Requires=docker.service
-
-[Service]
-Type=oneshot
-RemainAfterExit=yes
-ExecStart=/bin/bash /opt/dictia/deployment/security/iptables-rules.sh
-
-[Install]
-WantedBy=multi-user.target
diff --git a/deployment/security/iptables-rules.sh b/deployment/security/iptables-rules.sh
deleted file mode 100644
index 376cd7c..0000000
--- a/deployment/security/iptables-rules.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — iptables rules for cloud VPS
-#
-# Allows Docker internal traffic to reach the ASR proxy on port 9090.
-# Blocks direct external access to Docker container IPs.
-# Tailscale + UFW handle the main firewall — this script adds Docker-specific rules.
-#
-# Usage: sudo bash iptables-rules.sh
-set -euo pipefail
-
-echo "=== DictIA iptables rules ==="
-
-# Allow Docker containers (172.16.0.0/12) to reach ASR proxy on port 9090
-# This rule goes BEFORE the default DROP policy so containers can talk to the proxy
-iptables -C INPUT -s 172.16.0.0/12 -p tcp --dport 9090 -j ACCEPT 2>/dev/null \
- || iptables -I INPUT 1 -s 172.16.0.0/12 -p tcp --dport 9090 -j ACCEPT
-
-# Block direct external access to Docker container IPs (raw table, before conntrack)
-# Protects containers on non-default bridge networks (e.g., dictia-network)
-for NETWORK_ID in $(docker network ls --filter driver=bridge --format '{{.ID}}' 2>/dev/null); do
- BRIDGE=$(docker network inspect "$NETWORK_ID" --format '{{.Options.com.docker.network.bridge.name}}' 2>/dev/null || echo "")
- [ -z "$BRIDGE" ] && continue
- [ "$BRIDGE" = "docker0" ] && continue
-
- for CONTAINER_IP in $(docker network inspect "$NETWORK_ID" \
- --format '{{range .Containers}}{{.IPv4Address}} {{end}}' 2>/dev/null); do
- IP="${CONTAINER_IP%/*}"
- [ -z "$IP" ] && continue
- iptables -t raw -C PREROUTING -d "$IP" ! -i "$BRIDGE" -j DROP 2>/dev/null \
- || iptables -t raw -A PREROUTING -d "$IP" ! -i "$BRIDGE" -j DROP
- echo " Protected $IP on $BRIDGE"
- done
-done
-
-echo "Rules applied. Tailscale + Docker internal traffic allowed."
-echo "Verify with: sudo iptables -L -n -t raw"
diff --git a/deployment/setup.sh b/deployment/setup.sh
deleted file mode 100755
index dbf7fe3..0000000
--- a/deployment/setup.sh
+++ /dev/null
@@ -1,300 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — Main setup script
-#
-# Interactive installer that detects hardware and configures the appropriate
-# deployment profile (cloud, local-cpu, local-gpu).
-#
-# Usage:
-# bash deployment/setup.sh # Interactive mode
-# bash deployment/setup.sh --profile cloud # Non-interactive
-# bash deployment/setup.sh --profile local-gpu
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
-PROFILE=""
-
-for arg in "$@"; do
- case "$arg" in
- --profile=*) PROFILE="${arg#*=}" ;;
- --profile) shift_next=true ;;
- *)
- if [ "${shift_next:-false}" = true ]; then
- PROFILE="$arg"
- shift_next=false
- fi
- ;;
- esac
-done
-
-# --- Colors ---
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-CYAN='\033[0;36m'
-NC='\033[0m'
-
-info() { echo -e "${CYAN}[INFO]${NC} $*"; }
-ok() { echo -e "${GREEN}[OK]${NC} $*"; }
-warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
-err() { echo -e "${RED}[ERROR]${NC} $*"; }
-
-echo
-echo -e "${CYAN}========================================${NC}"
-echo -e "${CYAN} DictIA — Setup${NC}"
-echo -e "${CYAN}========================================${NC}"
-echo
-
-# ==========================================================================
-# 1. Hardware Detection
-# ==========================================================================
-info "Detecting hardware..."
-
-# Docker
-if command -v docker &>/dev/null && docker info &>/dev/null; then
- DOCKER_VERSION=$(docker --version | grep -oP '\d+\.\d+\.\d+' | head -1)
- ok "Docker $DOCKER_VERSION"
-else
- err "Docker not found or not running."
- echo " Install Docker: https://docs.docker.com/engine/install/"
- exit 1
-fi
-
-# Docker Compose
-if docker compose version &>/dev/null; then
- COMPOSE_VERSION=$(docker compose version --short 2>/dev/null || echo "unknown")
- ok "Docker Compose $COMPOSE_VERSION"
-else
- err "Docker Compose not found."
- echo " Docker Compose V2 is required (comes with Docker Desktop or docker-compose-plugin)"
- exit 1
-fi
-
-# GPU
-HAS_GPU=false
-if command -v nvidia-smi &>/dev/null; then
- GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1 || echo "")
- if [ -n "$GPU_NAME" ]; then
- HAS_GPU=true
- ok "NVIDIA GPU: $GPU_NAME"
- # Check nvidia-container-toolkit
- if docker info 2>/dev/null | grep -qi nvidia; then
- ok "nvidia-container-toolkit detected"
- else
- warn "nvidia-container-toolkit not detected. Required for local-gpu profile."
- echo " Install: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
- fi
- fi
-else
- info "No NVIDIA GPU detected"
-fi
-
-# RAM
-if command -v free &>/dev/null; then
- RAM_GB=$(free -g | awk '/Mem:/{print $2}')
- info "RAM: ${RAM_GB}GB"
-fi
-
-# Disk
-DISK_AVAIL=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $4}')
-info "Disk available: $DISK_AVAIL"
-
-echo
-
-# ==========================================================================
-# 2. Profile Selection
-# ==========================================================================
-if [ -z "$PROFILE" ]; then
- echo -e "${CYAN}Select deployment profile:${NC}"
- echo
- echo " 1) cloud — VPS with ASR Proxy (GCP GPU on demand)"
- echo " Best for: remote servers, pay-per-use GPU"
- echo
- echo " 2) local-gpu — Local NVIDIA GPU for transcription"
- echo " Best for: dedicated GPU server, fastest"
- if [ "$HAS_GPU" = false ]; then
- echo -e " ${YELLOW}(No GPU detected on this machine)${NC}"
- fi
- echo
- echo " 3) local-cpu — CPU-only transcription (slow)"
- echo " Best for: testing, low-volume usage"
- echo
- read -rp "Choice [1-3]: " CHOICE
- case "$CHOICE" in
- 1) PROFILE="cloud" ;;
- 2) PROFILE="local-gpu" ;;
- 3) PROFILE="local-cpu" ;;
- *) err "Invalid choice"; exit 1 ;;
- esac
-fi
-
-COMPOSE_FILE="$SCRIPT_DIR/docker/docker-compose.$PROFILE.yml"
-if [ ! -f "$COMPOSE_FILE" ]; then
- err "Compose file not found: $COMPOSE_FILE"
- exit 1
-fi
-
-ok "Profile: $PROFILE"
-echo
-
-# ==========================================================================
-# 3. Generate .env
-# ==========================================================================
-ENV_FILE="$PROJECT_DIR/.env"
-
-if [ -f "$ENV_FILE" ]; then
- warn ".env already exists. Keeping existing configuration."
- echo " To reconfigure, delete .env and re-run setup."
-else
- info "Generating .env..."
-
- # Generate secret key
- SECRET_KEY=$(python3 -c "import secrets; print(secrets.token_hex(32))" 2>/dev/null \
- || openssl rand -hex 32 2>/dev/null \
- || head -c 64 /dev/urandom | xxd -p | head -c 64)
-
- # Prompt for admin credentials
- read -rp "Admin username [admin]: " ADMIN_USER
- ADMIN_USER="${ADMIN_USER:-admin}"
- read -rp "Admin email [admin@example.com]: " ADMIN_EMAIL
- ADMIN_EMAIL="${ADMIN_EMAIL:-admin@example.com}"
- read -rsp "Admin password: " ADMIN_PASS
- echo
- ADMIN_PASS="${ADMIN_PASS:-changeme}"
-
- # Prompt for text model API key
- echo
- info "DictIA needs a text/LLM API key for summaries, titles, and chat."
- echo " Recommended: OpenRouter (https://openrouter.ai) — access to many models"
- read -rp "Text model API key (or press Enter to skip): " TEXT_API_KEY
- TEXT_API_KEY="${TEXT_API_KEY:-your_openrouter_api_key}"
-
- # HuggingFace token for diarization
- if [ "$PROFILE" != "cloud" ]; then
- echo
- info "For speaker diarization, a HuggingFace token is needed."
- echo " Get one at: https://huggingface.co/settings/tokens"
- echo " Accept model: https://huggingface.co/pyannote/speaker-diarization-3.1"
- read -rp "HuggingFace token (or press Enter to skip): " HF_TOKEN
- HF_TOKEN="${HF_TOKEN:-}"
- else
- HF_TOKEN=""
- fi
-
- # Write .env
- cp "$SCRIPT_DIR/docker/.env.example" "$ENV_FILE"
- sed -i "s|SECRET_KEY=.*|SECRET_KEY=$SECRET_KEY|" "$ENV_FILE"
- sed -i "s|DICTIA_PROFILE=.*|DICTIA_PROFILE=$PROFILE|" "$ENV_FILE"
- sed -i "s|ADMIN_USERNAME=.*|ADMIN_USERNAME=$ADMIN_USER|" "$ENV_FILE"
- sed -i "s|ADMIN_EMAIL=.*|ADMIN_EMAIL=$ADMIN_EMAIL|" "$ENV_FILE"
- sed -i "s|ADMIN_PASSWORD=.*|ADMIN_PASSWORD=$ADMIN_PASS|" "$ENV_FILE"
- sed -i "s|TEXT_MODEL_API_KEY=.*|TEXT_MODEL_API_KEY=$TEXT_API_KEY|" "$ENV_FILE"
- sed -i "s|HF_TOKEN=.*|HF_TOKEN=$HF_TOKEN|" "$ENV_FILE"
-
- ok ".env generated"
-fi
-echo
-
-# ==========================================================================
-# 4. Create data directories
-# ==========================================================================
-info "Creating data directories..."
-mkdir -p "$PROJECT_DIR/data/uploads" "$PROJECT_DIR/data/instance"
-ok "data/uploads and data/instance created"
-echo
-
-# ==========================================================================
-# 5. Profile-specific setup
-# ==========================================================================
-case "$PROFILE" in
- cloud)
- info "Cloud profile — setting up ASR Proxy..."
- if [ -f "$SCRIPT_DIR/asr-proxy/setup.sh" ]; then
- echo " Run the ASR proxy setup separately:"
- echo " bash $SCRIPT_DIR/asr-proxy/setup.sh"
- fi
- echo
- info "Setting up iptables rules..."
- if [ -f "$SCRIPT_DIR/security/iptables-rules.sh" ] && [ "$(id -u)" -eq 0 ]; then
- bash "$SCRIPT_DIR/security/iptables-rules.sh"
- else
- echo " Run as root: sudo bash $SCRIPT_DIR/security/iptables-rules.sh"
- fi
- echo
- info "Setting up Tailscale Serve..."
- if command -v tailscale &>/dev/null; then
- echo " Run: bash $SCRIPT_DIR/config/tailscale/setup-serve.sh"
- else
- warn "Tailscale not installed."
- echo " Install: curl -fsSL https://tailscale.com/install.sh | sh"
- fi
- ;;
- local-gpu)
- info "Local GPU profile — verifying NVIDIA runtime..."
- if docker info 2>/dev/null | grep -qi nvidia; then
- ok "NVIDIA Docker runtime available"
- # Quick GPU test
- if docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi &>/dev/null; then
- ok "GPU test passed"
- else
- warn "GPU test failed. Check nvidia-container-toolkit installation."
- fi
- else
- err "NVIDIA Docker runtime not found."
- echo " Install nvidia-container-toolkit and restart Docker."
- echo " https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
- fi
- ;;
- local-cpu)
- warn "CPU-only transcription is significantly slower than GPU."
- echo " Expect ~10x real-time (1h audio = ~10h processing)."
- echo " Consider local-gpu or cloud profile for better performance."
- ;;
-esac
-
-echo
-
-# ==========================================================================
-# 6. Build and start
-# ==========================================================================
-info "Building DictIA Docker image..."
-cd "$PROJECT_DIR"
-docker build -t innova-ai/dictia:latest .
-ok "Image built"
-
-echo
-info "Starting DictIA ($PROFILE profile)..."
-docker compose -f "$COMPOSE_FILE" up -d
-ok "Containers started"
-
-# ==========================================================================
-# 7. Health check
-# ==========================================================================
-echo
-info "Waiting for DictIA to become healthy..."
-RETRIES=30
-for i in $(seq 1 $RETRIES); do
- if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
- ok "DictIA is healthy!"
- break
- fi
- if [ "$i" -eq "$RETRIES" ]; then
- warn "Health check timeout. Check logs: docker compose -f $COMPOSE_FILE logs"
- fi
- sleep 5
-done
-
-echo
-echo -e "${GREEN}========================================${NC}"
-echo -e "${GREEN} DictIA is ready!${NC}"
-echo -e "${GREEN}========================================${NC}"
-echo
-echo " App: http://localhost:8899"
-echo " Profile: $PROFILE"
-echo " Compose: $COMPOSE_FILE"
-echo
-echo " Tools:"
-echo " Update: bash deployment/tools/update.sh"
-echo " Backup: bash deployment/tools/backup.sh"
-echo " Health check: bash deployment/tools/health-check.sh"
-echo
diff --git a/deployment/tools/backup.sh b/deployment/tools/backup.sh
deleted file mode 100644
index 17ee50a..0000000
--- a/deployment/tools/backup.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — Backup script
-#
-# Creates a timestamped backup of data, env, and Docker volumes.
-# Keeps the last N backups (default: 5).
-#
-# Usage: bash backup.sh [BACKUP_DIR]
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
-BACKUP_BASE="${1:-$PROJECT_DIR/backups}"
-KEEP_COUNT=5
-TIMESTAMP=$(date +%Y%m%d-%H%M%S)
-BACKUP_DIR="$BACKUP_BASE/dictia-$TIMESTAMP"
-
-echo "=== DictIA Backup ==="
-echo "Project: $PROJECT_DIR"
-echo "Backup: $BACKUP_DIR"
-echo
-
-mkdir -p "$BACKUP_DIR"
-
-# 1. Data directory
-if [ -d "$PROJECT_DIR/data" ]; then
- echo "[1/4] Backing up data/..."
- cp -a "$PROJECT_DIR/data" "$BACKUP_DIR/data"
-else
- echo "[1/4] No data/ directory found, skipping."
-fi
-
-# 2. Environment file
-if [ -f "$PROJECT_DIR/.env" ]; then
- echo "[2/4] Backing up .env..."
- cp "$PROJECT_DIR/.env" "$BACKUP_DIR/dot-env"
-else
- echo "[2/4] No .env found, skipping."
-fi
-
-# 3. ASR Proxy stats
-ASR_STATS="$PROJECT_DIR/deployment/asr-proxy/usage-stats.json"
-if [ -f "$ASR_STATS" ]; then
- echo "[3/4] Backing up ASR proxy stats..."
- cp "$ASR_STATS" "$BACKUP_DIR/asr-usage-stats.json"
-else
- echo "[3/4] No ASR proxy stats, skipping."
-fi
-
-# 4. Docker volumes (if using managed volumes)
-echo "[4/4] Checking Docker volumes..."
-if docker volume ls --format '{{.Name}}' 2>/dev/null | grep -q "whisperx-cache"; then
- echo " Exporting whisperx-cache volume..."
- docker run --rm -v whisperx-cache:/source -v "$BACKUP_DIR":/backup \
- alpine tar czf /backup/whisperx-cache.tar.gz -C /source . 2>/dev/null || true
-fi
-
-# Write manifest
-cat > "$BACKUP_DIR/manifest.json" </dev/null | wc -l)
-if [ "$BACKUP_COUNT" -gt "$KEEP_COUNT" ]; then
- echo
- echo "Rotating backups (keeping last $KEEP_COUNT)..."
- ls -1t "$BACKUP_BASE"/dictia-*.tar.gz | tail -n +"$((KEEP_COUNT + 1))" | xargs rm -f
-fi
-
-echo
-echo "=== Backup complete ==="
diff --git a/deployment/tools/health-check.sh b/deployment/tools/health-check.sh
deleted file mode 100644
index 8075289..0000000
--- a/deployment/tools/health-check.sh
+++ /dev/null
@@ -1,157 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — Health check diagnostic
-#
-# Checks Docker, containers, endpoints, disk, RAM, and GPU.
-#
-# Usage:
-# bash health-check.sh # Human-readable output
-# bash health-check.sh --json # JSON output
-# bash health-check.sh --quiet # Exit code only (0=ok, 1=issue)
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
-OUTPUT="human"
-ISSUES=0
-
-for arg in "$@"; do
- case "$arg" in
- --json) OUTPUT="json" ;;
- --quiet) OUTPUT="quiet" ;;
- esac
-done
-
-declare -A CHECKS
-
-check() {
- local name="$1"
- local status="$2"
- local detail="${3:-}"
- CHECKS["$name"]="$status|$detail"
- if [ "$status" = "error" ] || [ "$status" = "warning" ]; then
- ISSUES=$((ISSUES + 1))
- fi
-}
-
-# --- Docker ---
-if command -v docker &>/dev/null && docker info &>/dev/null; then
- check "docker" "ok" "Docker daemon running"
-else
- check "docker" "error" "Docker not available"
-fi
-
-# --- Containers ---
-DICTIA_STATUS=$(docker inspect --format='{{.State.Health.Status}}' dictia 2>/dev/null || echo "not_found")
-if [ "$DICTIA_STATUS" = "healthy" ]; then
- check "container_dictia" "ok" "healthy"
-elif [ "$DICTIA_STATUS" = "not_found" ]; then
- check "container_dictia" "error" "container not found"
-else
- check "container_dictia" "warning" "$DICTIA_STATUS"
-fi
-
-WHISPERX_STATUS=$(docker inspect --format='{{.State.Status}}' whisperx-asr 2>/dev/null || echo "not_found")
-if [ "$WHISPERX_STATUS" = "running" ]; then
- check "container_whisperx" "ok" "running"
-elif [ "$WHISPERX_STATUS" = "not_found" ]; then
- check "container_whisperx" "info" "not present (cloud profile?)"
-else
- check "container_whisperx" "warning" "$WHISPERX_STATUS"
-fi
-
-# --- Endpoints ---
-if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
- check "endpoint_dictia" "ok" "http://localhost:8899 responding"
-else
- check "endpoint_dictia" "error" "http://localhost:8899 not responding"
-fi
-
-if curl -sf -o /dev/null -m 5 http://localhost:9000/health 2>/dev/null; then
- check "endpoint_whisperx" "ok" "http://localhost:9000 responding"
-else
- check "endpoint_whisperx" "info" "http://localhost:9000 not responding"
-fi
-
-if curl -sf -o /dev/null -m 5 http://localhost:9090/health 2>/dev/null; then
- check "endpoint_asr_proxy" "ok" "http://localhost:9090 responding"
-else
- check "endpoint_asr_proxy" "info" "http://localhost:9090 not responding"
-fi
-
-# --- Disk ---
-DISK_USED=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $5}' | tr -d '%')
-if [ -n "$DISK_USED" ]; then
- if [ "$DISK_USED" -gt 90 ]; then
- check "disk" "error" "${DISK_USED}% used"
- elif [ "$DISK_USED" -gt 80 ]; then
- check "disk" "warning" "${DISK_USED}% used"
- else
- check "disk" "ok" "${DISK_USED}% used"
- fi
-fi
-
-# --- RAM ---
-if command -v free &>/dev/null; then
- MEM_TOTAL=$(free -m | awk '/Mem:/{print $2}')
- MEM_AVAIL=$(free -m | awk '/Mem:/{print $7}')
- MEM_USED_PCT=$(( (MEM_TOTAL - MEM_AVAIL) * 100 / MEM_TOTAL ))
- if [ "$MEM_USED_PCT" -gt 90 ]; then
- check "memory" "warning" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
- else
- check "memory" "ok" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
- fi
-fi
-
-# --- GPU ---
-if command -v nvidia-smi &>/dev/null; then
- GPU_INFO=$(nvidia-smi --query-gpu=name,memory.used,memory.total --format=csv,noheader 2>/dev/null || echo "error")
- if [ "$GPU_INFO" != "error" ]; then
- check "gpu" "ok" "$GPU_INFO"
- else
- check "gpu" "warning" "nvidia-smi present but query failed"
- fi
-fi
-
-# --- Output ---
-if [ "$OUTPUT" = "json" ]; then
- echo "{"
- echo " \"timestamp\": \"$(date -Is)\","
- echo " \"issues\": $ISSUES,"
- echo " \"checks\": {"
- FIRST=true
- for name in "${!CHECKS[@]}"; do
- IFS='|' read -r status detail <<< "${CHECKS[$name]}"
- if [ "$FIRST" = true ]; then
- FIRST=false
- else
- echo ","
- fi
- printf ' "%s": {"status": "%s", "detail": "%s"}' "$name" "$status" "$detail"
- done
- echo
- echo " }"
- echo "}"
-elif [ "$OUTPUT" = "quiet" ]; then
- exit $( [ "$ISSUES" -eq 0 ] && echo 0 || echo 1 )
-else
- echo "=== DictIA Health Check ==="
- echo
- for name in docker container_dictia container_whisperx endpoint_dictia endpoint_whisperx endpoint_asr_proxy disk memory gpu; do
- if [ -n "${CHECKS[$name]+x}" ]; then
- IFS='|' read -r status detail <<< "${CHECKS[$name]}"
- case "$status" in
- ok) ICON="[OK]" ;;
- warning) ICON="[!!]" ;;
- error) ICON="[ERR]" ;;
- info) ICON="[--]" ;;
- esac
- printf " %-22s %s %s\n" "$name" "$ICON" "$detail"
- fi
- done
- echo
- if [ "$ISSUES" -eq 0 ]; then
- echo "All checks passed."
- else
- echo "$ISSUES issue(s) found."
- fi
-fi
diff --git a/deployment/tools/restore.sh b/deployment/tools/restore.sh
deleted file mode 100644
index 4c9d46a..0000000
--- a/deployment/tools/restore.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — Restore script
-#
-# Restores a DictIA backup archive created by backup.sh.
-#
-# Usage: bash restore.sh [PROJECT_DIR]
-set -euo pipefail
-
-ARCHIVE="${1:-}"
-PROJECT_DIR="${2:-$(cd "$(dirname "$0")/../.." && pwd)}"
-
-if [ -z "$ARCHIVE" ] || [ ! -f "$ARCHIVE" ]; then
- echo "Usage: bash restore.sh [project-dir]"
- echo
- echo "Available backups:"
- ls -1t "$PROJECT_DIR/backups"/dictia-*.tar.gz 2>/dev/null | head -5 || echo " (none found)"
- exit 1
-fi
-
-echo "=== DictIA Restore ==="
-echo "Archive: $ARCHIVE"
-echo "Target: $PROJECT_DIR"
-echo
-
-# Validate archive
-echo "Validating archive..."
-TMPDIR=$(mktemp -d)
-tar xzf "$ARCHIVE" -C "$TMPDIR"
-BACKUP_DIR=$(ls -1d "$TMPDIR"/dictia-* | head -1)
-
-if [ ! -f "$BACKUP_DIR/manifest.json" ]; then
- echo "ERROR: Invalid backup archive (no manifest.json)"
- rm -rf "$TMPDIR"
- exit 1
-fi
-
-echo "Manifest:"
-cat "$BACKUP_DIR/manifest.json"
-echo
-echo
-
-# Confirmation
-read -rp "This will overwrite current data. Continue? [y/N] " CONFIRM
-if [ "$CONFIRM" != "y" ] && [ "$CONFIRM" != "Y" ]; then
- echo "Aborted."
- rm -rf "$TMPDIR"
- exit 0
-fi
-
-# Stop services
-echo
-echo "Stopping DictIA services..."
-COMPOSE_FILE=""
-for f in cloud local-cpu local-gpu; do
- if [ -f "$PROJECT_DIR/deployment/docker/docker-compose.$f.yml" ]; then
- COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml"
- fi
-done
-if [ -n "$COMPOSE_FILE" ]; then
- docker compose -f "$COMPOSE_FILE" down 2>/dev/null || true
-fi
-
-# Restore data
-if [ -d "$BACKUP_DIR/data" ]; then
- echo "Restoring data/..."
- rm -rf "$PROJECT_DIR/data"
- cp -a "$BACKUP_DIR/data" "$PROJECT_DIR/data"
-fi
-
-# Restore .env
-if [ -f "$BACKUP_DIR/dot-env" ]; then
- echo "Restoring .env..."
- cp "$BACKUP_DIR/dot-env" "$PROJECT_DIR/.env"
-fi
-
-# Restore ASR stats
-if [ -f "$BACKUP_DIR/asr-usage-stats.json" ]; then
- echo "Restoring ASR proxy stats..."
- cp "$BACKUP_DIR/asr-usage-stats.json" "$PROJECT_DIR/deployment/asr-proxy/usage-stats.json"
-fi
-
-# Restore Docker volumes
-if [ -f "$BACKUP_DIR/whisperx-cache.tar.gz" ]; then
- echo "Restoring whisperx-cache volume..."
- docker volume create whisperx-cache 2>/dev/null || true
- docker run --rm -v whisperx-cache:/target -v "$BACKUP_DIR":/backup \
- alpine sh -c "cd /target && tar xzf /backup/whisperx-cache.tar.gz" 2>/dev/null || true
-fi
-
-# Cleanup
-rm -rf "$TMPDIR"
-
-# Restart services
-echo
-echo "Restarting DictIA..."
-if [ -n "$COMPOSE_FILE" ]; then
- docker compose -f "$COMPOSE_FILE" up -d
-fi
-
-echo
-echo "=== Restore complete ==="
diff --git a/deployment/tools/update.sh b/deployment/tools/update.sh
deleted file mode 100644
index 54be4b7..0000000
--- a/deployment/tools/update.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — Update script
-#
-# Pulls latest code, rebuilds Docker image, and restarts services.
-# Detects the active deployment profile automatically.
-#
-# Usage: bash update.sh [--no-pull] [--no-build]
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
-NO_PULL=false
-NO_BUILD=false
-
-for arg in "$@"; do
- case "$arg" in
- --no-pull) NO_PULL=true ;;
- --no-build) NO_BUILD=true ;;
- *) echo "Unknown option: $arg"; exit 1 ;;
- esac
-done
-
-echo "=== DictIA Update ==="
-echo "Project: $PROJECT_DIR"
-echo
-
-# 1. Detect active compose file
-COMPOSE_FILE=""
-PROFILE=""
-for f in cloud local-cpu local-gpu; do
- CF="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml"
- if [ -f "$CF" ] && docker compose -f "$CF" ps --quiet 2>/dev/null | grep -q .; then
- COMPOSE_FILE="$CF"
- PROFILE="$f"
- break
- fi
-done
-
-if [ -z "$COMPOSE_FILE" ]; then
- # Fallback: check .env for profile
- if [ -f "$PROJECT_DIR/.env" ]; then
- PROFILE=$(grep -E '^DICTIA_PROFILE=' "$PROJECT_DIR/.env" 2>/dev/null | cut -d= -f2 || echo "cloud")
- fi
- PROFILE="${PROFILE:-cloud}"
- COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$PROFILE.yml"
-fi
-
-echo "Profile: $PROFILE"
-echo "Compose: $COMPOSE_FILE"
-echo
-
-# 2. Git pull
-if [ "$NO_PULL" = false ]; then
- echo "[1/5] Pulling latest code..."
- cd "$PROJECT_DIR"
- git pull origin dictia-branding
-else
- echo "[1/5] Skipping git pull (--no-pull)"
-fi
-
-# 3. Rebuild DictIA image
-if [ "$NO_BUILD" = false ]; then
- echo "[2/5] Building DictIA image..."
- cd "$PROJECT_DIR"
- docker build -t innova-ai/dictia:latest .
-else
- echo "[2/5] Skipping build (--no-build)"
-fi
-
-# 3b. Pull upstream images (WhisperX) if local profile
-if [ "$PROFILE" != "cloud" ] && [ "$NO_BUILD" = false ]; then
- echo "[3/5] Pulling upstream images (WhisperX)..."
- docker compose -f "$COMPOSE_FILE" pull whisperx-asr 2>/dev/null || true
-else
- echo "[3/5] Skipping upstream pull (cloud profile or --no-build)"
-fi
-
-# 4. Restart containers
-echo "[4/5] Restarting containers..."
-docker compose -f "$COMPOSE_FILE" down
-docker compose -f "$COMPOSE_FILE" up -d
-
-# 5. Wait for health
-echo "[5/5] Waiting for health check..."
-RETRIES=30
-for i in $(seq 1 $RETRIES); do
- if docker compose -f "$COMPOSE_FILE" ps | grep -q "healthy"; then
- echo " DictIA is healthy!"
- break
- fi
- if [ "$i" -eq "$RETRIES" ]; then
- echo " WARNING: Health check timeout. Check: docker compose -f $COMPOSE_FILE logs"
- fi
- sleep 5
-done
-
-# Cleanup dangling images
-echo
-echo "Cleaning up old images..."
-docker image prune -f 2>/dev/null || true
-
-echo
-echo "=== Update complete ==="
-echo "DictIA: http://localhost:8899"
-docker compose -f "$COMPOSE_FILE" ps