diff --git a/README.md b/README.md
index e5ae33a..e189276 100644
--- a/README.md
+++ b/README.md
@@ -13,20 +13,12 @@ Application de transcription audio propulsee par l'intelligence artificielle. Tr
 - Conformite Loi 25 (Quebec) — journal d'audit integre
 - 100% auto-heberge — vos donnees restent chez vous
 
-## Demarrage rapide
-
-Voir le [guide de demarrage](client_docs/guide-utilisateur/premiers-pas.md).
-
 ## Documentation
 
 - [Guide utilisateur](client_docs/guide-utilisateur/index.md)
 - [Guide administrateur](client_docs/guide-admin/index.md)
 - [Depannage](client_docs/depannage/index.md)
 
-## Deploiement
-
-Voir le [guide de deploiement](deployment/README.md) et les profils Docker dans `deployment/profiles/`.
-
 ## Licence
 
 AGPL-3.0 — voir [LICENSE](LICENSE).
diff --git a/deployment/README.md b/deployment/README.md
deleted file mode 100644
index 0569f71..0000000
--- a/deployment/README.md
+++ /dev/null
@@ -1,105 +0,0 @@
-# DictIA — Deployment Infrastructure
-
-Infrastructure de deploiement reproductible pour DictIA .
-
-## Choix de profil
-
-```
-Quel est ton setup?
-    |
-    +-- VPS / serveur cloud?
-    |   --> cloud (ASR Proxy GCP GPU on demand)
-    |
-    +-- Machine locale avec GPU NVIDIA?
-    |   --> local-gpu (WhisperX sur GPU, le plus rapide)
-    |
-    +-- Machine locale sans GPU?
-        --> local-cpu (WhisperX sur CPU, lent mais fonctionnel)
-```
-
-## Quickstart
-
-```bash
-git clone https://gitea.innova-ai.ca/Innova-AI/dictia-public.git
-cd dictia
-git checkout dictia-branding
-bash deployment/setup.sh
-```
-
-Le script detecte le hardware et guide l'installation.
-
-## Architecture
-
-```
-deployment/
-├── setup.sh                  # Installateur principal
-├── docker/
-│   ├── docker-compose.cloud.yml
-│   ├── docker-compose.local-cpu.yml
-│   ├── docker-compose.local-gpu.yml
-│   └── .env.example
-├── asr-proxy/                # Proxy GCP GPU (cloud seulement)
-│   ├── proxy.py
-│   ├── dashboard.html
-│   ├── requirements.txt
-│   ├── setup.sh
-│   └── asr-proxy.service
-├── security/                 # Securite Docker (cloud)
-│   ├── docker-daemon.json
-│   ├── iptables-rules.sh
-│   └── docker-iptables.service
-├── config/
-│   ├── nginx/dictia.conf
-│   ├── tailscale/setup-serve.sh
-│   └── systemd/dictia.service
-├── tools/
-│   ├── backup.sh
-│   ├── restore.sh
-│   ├── update.sh
-│   └── health-check.sh
-└── docs/
-    ├── QUICKSTART.md
-    ├── VPS-SETUP.md
-    ├── LOCAL-SETUP.md
-    ├── MAINTENANCE.md
-    └── TROUBLESHOOTING.md
-```
-
-### Profil Cloud
-
-```
-Internet --> Tailscale --> VPS
-                           |
-                      DictIA :8899
-                           |
-                      ASR Proxy :9090
-                           |
-                      GCP GPU (auto start/stop)
-                           |
-                      WhisperX :9000
-```
-
-### Profil Local GPU/CPU
-
-```
-localhost:8899 --> DictIA container
-                       |
-                  WhisperX container :9000
-                       |
-                  GPU local (ou CPU)
-```
-
-## Documentation
-
-- [QUICKSTART.md](docs/QUICKSTART.md) — Demarrage rapide par profil
-- [VPS-SETUP.md](docs/VPS-SETUP.md) — Setup VPS complet from scratch
-- [LOCAL-SETUP.md](docs/LOCAL-SETUP.md) — Setup local GPU/CPU
-- [MAINTENANCE.md](docs/MAINTENANCE.md) — Backup, restore, update, monitoring
-- [TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) — Problemes courants + solutions
-
-## Mise a jour upstream
-
-Tous les fichiers dans `deployment/` sont specifiques a DictIA.
-Aucun conflit lors des merges upstream, sauf `deployment/setup.sh`
-(qui remplace le setup.sh original de Speakr).
-
diff --git a/deployment/asr-proxy/.gitignore b/deployment/asr-proxy/.gitignore
deleted file mode 100644
index 8ff2efb..0000000
--- a/deployment/asr-proxy/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
-gcp-credentials.json
-usage-stats.json
-venv/
-__pycache__/
-*.pyc
diff --git a/deployment/asr-proxy/asr-proxy.service b/deployment/asr-proxy/asr-proxy.service
deleted file mode 100644
index 917ec8a..0000000
--- a/deployment/asr-proxy/asr-proxy.service
+++ /dev/null
@@ -1,22 +0,0 @@
-# TEMPLATE — Ne pas copier directement dans /etc/systemd/system/.
-# Les variables ${ASR_PROXY_USER} et ${ASR_PROXY_DIR} sont des placeholders.
-# Le fichier service réel est généré par setup.sh (via heredoc bash) avec les
-# valeurs résolues de $SERVICE_USER et $INSTALL_DIR.
-# Usage : sudo bash setup.sh  (installe et active le service automatiquement)
-
-[Unit]
-Description=DictIA ASR Proxy - GPU Auto-Start/Stop for WhisperX
-After=network.target
-
-[Service]
-Type=simple
-User=${ASR_PROXY_USER}
-Restart=always
-RestartSec=10
-WorkingDirectory=${ASR_PROXY_DIR}
-ExecStart=${ASR_PROXY_DIR}/venv/bin/python proxy.py
-Environment=GOOGLE_APPLICATION_CREDENTIALS=${ASR_PROXY_DIR}/gcp-credentials.json
-Environment=STATS_FILE=${ASR_PROXY_DIR}/usage-stats.json
-
-[Install]
-WantedBy=multi-user.target
diff --git a/deployment/asr-proxy/dashboard.html b/deployment/asr-proxy/dashboard.html
deleted file mode 100644
index ba1ca7b..0000000
--- a/deployment/asr-proxy/dashboard.html
+++ /dev/null
@@ -1,1534 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-<meta charset="UTF-8">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
-<title>DictIA GPU Monitor</title>
-<link rel="preconnect" href="https://fonts.googleapis.com">
-<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
-<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500;600;700&family=Outfit:wght@300;400;500;600;700&display=swap" rel="stylesheet">
-<style>
-  :root {
-    --bg-primary: #0a0e17;
-    --bg-card: #111827;
-    --bg-card-hover: #151d2e;
-    --border-subtle: #1e293b;
-    --border-hover: #2a3a52;
-    --accent-cyan: #00e5ff;
-    --accent-cyan-dim: rgba(0, 229, 255, 0.15);
-    --accent-cyan-glow: rgba(0, 229, 255, 0.4);
-    --accent-amber: #ffb300;
-    --accent-amber-dim: rgba(255, 179, 0, 0.15);
-    --accent-red: #ff3d3d;
-    --accent-red-dim: rgba(255, 61, 61, 0.15);
-    --accent-green: #00e676;
-    --text-primary: #e2e8f0;
-    --text-secondary: #94a3b8;
-    --text-muted: #475569;
-    --font-mono: 'JetBrains Mono', monospace;
-    --font-display: 'Outfit', sans-serif;
-  }
-
-  *, *::before, *::after {
-    box-sizing: border-box;
-    margin: 0;
-    padding: 0;
-  }
-
-  body {
-    background-color: var(--bg-primary);
-    color: var(--text-primary);
-    font-family: var(--font-display);
-    min-height: 100vh;
-    overflow-x: hidden;
-    position: relative;
-  }
-
-  /* Subtle grid overlay */
-  body::before {
-    content: '';
-    position: fixed;
-    top: 0;
-    left: 0;
-    width: 100%;
-    height: 100%;
-    background-image:
-      linear-gradient(rgba(30, 41, 59, 0.18) 1px, transparent 1px),
-      linear-gradient(90deg, rgba(30, 41, 59, 0.18) 1px, transparent 1px);
-    background-size: 40px 40px;
-    pointer-events: none;
-    z-index: 0;
-  }
-
-  /* Scanline overlay for that CRT feel */
-  body::after {
-    content: '';
-    position: fixed;
-    top: 0;
-    left: 0;
-    width: 100%;
-    height: 100%;
-    background: repeating-linear-gradient(
-      0deg,
-      transparent,
-      transparent 2px,
-      rgba(0, 0, 0, 0.03) 2px,
-      rgba(0, 0, 0, 0.03) 4px
-    );
-    pointer-events: none;
-    z-index: 0;
-  }
-
-  .container {
-    max-width: 1100px;
-    margin: 0 auto;
-    padding: 0 24px 40px;
-    position: relative;
-    z-index: 1;
-  }
-
-  /* ---- Error Banner ---- */
-  .error-banner {
-    position: fixed;
-    top: 0;
-    left: 0;
-    right: 0;
-    background: linear-gradient(135deg, rgba(255, 61, 61, 0.12), rgba(255, 61, 61, 0.06));
-    border-bottom: 1px solid rgba(255, 61, 61, 0.3);
-    color: var(--accent-red);
-    font-family: var(--font-mono);
-    font-size: 0.8rem;
-    padding: 10px 24px;
-    text-align: center;
-    z-index: 100;
-    transform: translateY(-100%);
-    transition: transform 0.3s ease;
-    backdrop-filter: blur(10px);
-  }
-  .error-banner.visible {
-    transform: translateY(0);
-  }
-
-  /* ---- Header ---- */
-  .header {
-    display: flex;
-    align-items: center;
-    justify-content: space-between;
-    padding: 24px 0 20px;
-    border-bottom: 1px solid var(--border-subtle);
-    margin-bottom: 40px;
-    opacity: 0;
-    animation: fadeSlideDown 0.5s ease forwards;
-  }
-
-  .header-title {
-    font-family: var(--font-display);
-    font-weight: 600;
-    font-size: 1.1rem;
-    letter-spacing: 0.3em;
-    text-transform: uppercase;
-    color: var(--text-primary);
-  }
-  .header-title span {
-    color: var(--accent-cyan);
-  }
-
-  .proxy-badge {
-    display: flex;
-    align-items: center;
-    gap: 8px;
-    font-family: var(--font-mono);
-    font-size: 0.75rem;
-    color: var(--text-secondary);
-    background: rgba(0, 230, 118, 0.06);
-    border: 1px solid rgba(0, 230, 118, 0.2);
-    padding: 5px 14px;
-    border-radius: 20px;
-    transition: all 0.3s ease;
-  }
-  .proxy-badge.unhealthy {
-    background: var(--accent-red-dim);
-    border-color: rgba(255, 61, 61, 0.3);
-  }
-  .proxy-badge-dot {
-    width: 6px;
-    height: 6px;
-    border-radius: 50%;
-    background: var(--accent-green);
-    box-shadow: 0 0 6px var(--accent-green);
-  }
-  .proxy-badge.unhealthy .proxy-badge-dot {
-    background: var(--accent-red);
-    box-shadow: 0 0 6px var(--accent-red);
-  }
-
-  /* ---- Hero Status ---- */
-  .hero {
-    display: flex;
-    flex-direction: column;
-    align-items: center;
-    padding: 48px 0 40px;
-    opacity: 0;
-    animation: fadeSlideDown 0.5s ease 0.1s forwards;
-  }
-
-  .status-ring-container {
-    position: relative;
-    width: 120px;
-    height: 120px;
-    margin-bottom: 24px;
-  }
-
-  .status-ring {
-    width: 120px;
-    height: 120px;
-    border-radius: 50%;
-    border: 3px solid var(--accent-red);
-    background: var(--accent-red-dim);
-    position: relative;
-    transition: border-color 0.6s ease, background 0.6s ease, box-shadow 0.6s ease;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-  }
-  .status-ring.running {
-    border-color: var(--accent-cyan);
-    background: var(--accent-cyan-dim);
-    box-shadow:
-      0 0 30px rgba(0, 229, 255, 0.2),
-      0 0 60px rgba(0, 229, 255, 0.1);
-  }
-  .status-ring.running::before {
-    content: '';
-    position: absolute;
-    top: -8px;
-    left: -8px;
-    right: -8px;
-    bottom: -8px;
-    border-radius: 50%;
-    border: 1px solid rgba(0, 229, 255, 0.25);
-    animation: pulseRing 2.5s ease-in-out infinite;
-  }
-  .status-ring.running::after {
-    content: '';
-    position: absolute;
-    top: -16px;
-    left: -16px;
-    right: -16px;
-    bottom: -16px;
-    border-radius: 50%;
-    border: 1px solid rgba(0, 229, 255, 0.1);
-    animation: pulseRing 2.5s ease-in-out 0.5s infinite;
-  }
-
-  .status-ring-inner {
-    width: 10px;
-    height: 10px;
-    border-radius: 50%;
-    background: var(--accent-red);
-    transition: background 0.6s ease, box-shadow 0.6s ease;
-  }
-  .status-ring.running .status-ring-inner {
-    background: var(--accent-cyan);
-    box-shadow: 0 0 20px var(--accent-cyan-glow);
-    animation: innerPulse 2s ease-in-out infinite;
-  }
-
-  @keyframes pulseRing {
-    0%, 100% { transform: scale(1); opacity: 1; }
-    50% { transform: scale(1.08); opacity: 0.4; }
-  }
-  @keyframes innerPulse {
-    0%, 100% { opacity: 1; transform: scale(1); }
-    50% { opacity: 0.6; transform: scale(1.4); }
-  }
-
-  .status-label {
-    font-family: var(--font-mono);
-    font-size: 1.6rem;
-    font-weight: 600;
-    letter-spacing: 0.15em;
-    color: var(--accent-red);
-    transition: color 0.6s ease;
-    margin-bottom: 6px;
-  }
-  .status-label.running {
-    color: var(--accent-cyan);
-  }
-
-  .zone-label {
-    font-family: var(--font-mono);
-    font-size: 0.85rem;
-    color: var(--text-muted);
-    letter-spacing: 0.05em;
-  }
-
-  .active-requests-badge {
-    margin-top: 14px;
-    display: none;
-    align-items: center;
-    gap: 6px;
-    font-family: var(--font-mono);
-    font-size: 0.8rem;
-    color: var(--accent-amber);
-    background: var(--accent-amber-dim);
-    border: 1px solid rgba(255, 179, 0, 0.25);
-    padding: 5px 16px;
-    border-radius: 20px;
-  }
-  .active-requests-badge.visible {
-    display: flex;
-  }
-  .active-requests-badge::before {
-    content: '';
-    width: 6px;
-    height: 6px;
-    border-radius: 50%;
-    background: var(--accent-amber);
-    animation: innerPulse 1.2s ease-in-out infinite;
-  }
-
-  /* ---- Stats Grid ---- */
-  .stats-grid {
-    display: grid;
-    grid-template-columns: repeat(4, 1fr);
-    gap: 16px;
-    margin-bottom: 32px;
-  }
-
-  .stat-card {
-    background: var(--bg-card);
-    border: 1px solid var(--border-subtle);
-    border-radius: 12px;
-    padding: 24px 20px;
-    text-align: center;
-    transition: all 0.3s ease;
-    opacity: 0;
-    animation: fadeSlideUp 0.5s ease forwards;
-  }
-  .stat-card:nth-child(1) { animation-delay: 0.2s; }
-  .stat-card:nth-child(2) { animation-delay: 0.3s; }
-  .stat-card:nth-child(3) { animation-delay: 0.4s; }
-  .stat-card:nth-child(4) { animation-delay: 0.5s; }
-
-  .stat-card:hover {
-    background: var(--bg-card-hover);
-    border-color: var(--border-hover);
-    box-shadow: 0 4px 24px rgba(0, 0, 0, 0.3), 0 0 1px rgba(0, 229, 255, 0.1);
-    transform: translateY(-2px);
-  }
-
-  .stat-value {
-    font-family: var(--font-mono);
-    font-size: 1.8rem;
-    font-weight: 600;
-    color: var(--text-primary);
-    margin-bottom: 6px;
-    transition: color 0.3s ease;
-    line-height: 1.2;
-  }
-
-  .stat-label {
-    font-family: var(--font-display);
-    font-size: 0.78rem;
-    font-weight: 400;
-    color: var(--text-muted);
-    text-transform: uppercase;
-    letter-spacing: 0.1em;
-  }
-
-  .stat-sublabel {
-    font-family: var(--font-mono);
-    font-size: 0.7rem;
-    color: var(--text-muted);
-    margin-top: 2px;
-    opacity: 0.7;
-  }
-
-  /* ---- Budget Bar ---- */
-  .budget-section {
-    margin-bottom: 32px;
-    opacity: 0;
-    animation: fadeSlideUp 0.5s ease 0.6s forwards;
-  }
-
-  .budget-bar-container {
-    background: var(--bg-card);
-    border: 1px solid var(--border-subtle);
-    border-radius: 12px;
-    padding: 24px;
-  }
-
-  .budget-bar-header {
-    display: flex;
-    justify-content: space-between;
-    align-items: center;
-    margin-bottom: 14px;
-  }
-
-  .budget-bar-title {
-    font-family: var(--font-display);
-    font-size: 0.8rem;
-    font-weight: 500;
-    color: var(--text-secondary);
-    text-transform: uppercase;
-    letter-spacing: 0.1em;
-  }
-
-  .budget-bar-percent {
-    font-family: var(--font-mono);
-    font-size: 0.9rem;
-    font-weight: 600;
-    color: var(--accent-cyan);
-    transition: color 0.3s ease;
-  }
-
-  .budget-bar-track {
-    width: 100%;
-    height: 10px;
-    background: rgba(30, 41, 59, 0.6);
-    border-radius: 5px;
-    overflow: hidden;
-    position: relative;
-  }
-
-  .budget-bar-fill {
-    height: 100%;
-    border-radius: 5px;
-    background: linear-gradient(90deg, var(--accent-cyan), var(--accent-amber));
-    transition: width 0.8s cubic-bezier(0.25, 0.46, 0.45, 0.94);
-    position: relative;
-    min-width: 0;
-  }
-  .budget-bar-fill::after {
-    content: '';
-    position: absolute;
-    top: 0;
-    right: 0;
-    width: 30px;
-    height: 100%;
-    background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.15));
-    border-radius: 0 5px 5px 0;
-  }
-
-  .budget-bar-text {
-    font-family: var(--font-mono);
-    font-size: 0.78rem;
-    color: var(--text-muted);
-    margin-top: 10px;
-    text-align: center;
-  }
-
-  /* ---- Controls ---- */
-  .controls {
-    display: flex;
-    gap: 16px;
-    justify-content: center;
-    margin-bottom: 32px;
-    opacity: 0;
-    animation: fadeSlideUp 0.5s ease 0.7s forwards;
-  }
-
-  .btn {
-    font-family: var(--font-mono);
-    font-size: 0.85rem;
-    font-weight: 500;
-    letter-spacing: 0.08em;
-    padding: 12px 36px;
-    border-radius: 8px;
-    cursor: pointer;
-    transition: all 0.25s ease;
-    background: transparent;
-    position: relative;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    gap: 8px;
-    min-width: 160px;
-  }
-
-  .btn-start {
-    color: var(--accent-cyan);
-    border: 1px solid rgba(0, 229, 255, 0.35);
-  }
-  .btn-start:hover:not(:disabled) {
-    background: rgba(0, 229, 255, 0.08);
-    border-color: var(--accent-cyan);
-    box-shadow: 0 0 20px rgba(0, 229, 255, 0.15);
-  }
-
-  .btn-stop {
-    color: var(--accent-red);
-    border: 1px solid rgba(255, 61, 61, 0.35);
-  }
-  .btn-stop:hover:not(:disabled) {
-    background: rgba(255, 61, 61, 0.08);
-    border-color: var(--accent-red);
-    box-shadow: 0 0 20px rgba(255, 61, 61, 0.15);
-  }
-
-  .btn:disabled {
-    opacity: 0.3;
-    cursor: not-allowed;
-  }
-
-  .btn .spinner {
-    display: none;
-    width: 14px;
-    height: 14px;
-    border: 2px solid transparent;
-    border-top-color: currentColor;
-    border-radius: 50%;
-    animation: spin 0.7s linear infinite;
-  }
-  .btn.loading .spinner {
-    display: block;
-  }
-  .btn.loading .btn-text {
-    opacity: 0.5;
-  }
-
-  @keyframes spin {
-    to { transform: rotate(360deg); }
-  }
-
-  /* ---- Instance Details ---- */
-  .instance-section {
-    margin-bottom: 32px;
-    opacity: 0;
-    animation: fadeSlideUp 0.5s ease 0.8s forwards;
-  }
-
-  .instance-card {
-    background: var(--bg-card);
-    border: 1px solid var(--border-subtle);
-    border-radius: 12px;
-    padding: 24px;
-  }
-
-  .instance-card-header {
-    font-family: var(--font-display);
-    font-size: 0.78rem;
-    font-weight: 500;
-    color: var(--text-muted);
-    text-transform: uppercase;
-    letter-spacing: 0.1em;
-    margin-bottom: 18px;
-    padding-bottom: 12px;
-    border-bottom: 1px solid var(--border-subtle);
-  }
-
-  .instance-grid {
-    display: grid;
-    grid-template-columns: repeat(3, 1fr);
-    gap: 20px 24px;
-  }
-
-  .instance-item-label {
-    font-family: var(--font-display);
-    font-size: 0.68rem;
-    font-weight: 500;
-    color: var(--text-muted);
-    text-transform: uppercase;
-    letter-spacing: 0.08em;
-    margin-bottom: 4px;
-  }
-
-  .instance-item-value {
-    font-family: var(--font-mono);
-    font-size: 0.88rem;
-    font-weight: 500;
-    color: var(--text-primary);
-  }
-
-  .instance-item-value .sub-text {
-    font-size: 0.72rem;
-    margin-left: 6px;
-  }
-
-  .instance-item-value .sub-text.color-green { color: var(--accent-green); }
-  .instance-item-value .sub-text.color-amber { color: var(--accent-amber); }
-  .instance-item-value .sub-text.color-red { color: var(--accent-red); }
-
-  /* ---- Zone Fallback Map ---- */
-  .zone-section {
-    margin-bottom: 32px;
-    opacity: 0;
-    animation: fadeSlideUp 0.5s ease 0.9s forwards;
-  }
-
-  .zone-card {
-    background: var(--bg-card);
-    border: 1px solid var(--border-subtle);
-    border-radius: 12px;
-    padding: 24px;
-  }
-
-  .zone-card-header {
-    font-family: var(--font-display);
-    font-size: 0.78rem;
-    font-weight: 500;
-    color: var(--text-muted);
-    text-transform: uppercase;
-    letter-spacing: 0.1em;
-    margin-bottom: 18px;
-    padding-bottom: 12px;
-    border-bottom: 1px solid var(--border-subtle);
-  }
-
-  .zone-grid {
-    display: flex;
-    gap: 14px;
-    flex-wrap: wrap;
-  }
-
-  .zone-block {
-    flex: 1;
-    min-width: 180px;
-    background: rgba(10, 14, 23, 0.6);
-    border: 1px solid var(--border-subtle);
-    border-radius: 10px;
-    padding: 16px;
-    transition: all 0.3s ease;
-    position: relative;
-  }
-
-  .zone-block:hover {
-    background: var(--bg-card-hover);
-    border-color: var(--border-hover);
-  }
-
-  .zone-block.status-running {
-    border-color: rgba(0, 229, 255, 0.35);
-  }
-  .zone-block.status-running.active-zone {
-    border-color: rgba(0, 229, 255, 0.5);
-    background: rgba(0, 229, 255, 0.04);
-    box-shadow: 0 0 20px rgba(0, 229, 255, 0.06), inset 0 0 20px rgba(0, 229, 255, 0.03);
-  }
-  .zone-block.status-no_capacity {
-    border-color: rgba(255, 61, 61, 0.3);
-  }
-  .zone-block.status-starting {
-    border-color: rgba(255, 179, 0, 0.35);
-  }
-  .zone-block.status-unknown {
-    border-color: var(--border-subtle);
-  }
-  .zone-block.status-quota_exceeded {
-    border-color: rgba(255, 61, 61, 0.3);
-  }
-
-  .zone-block-top {
-    display: flex;
-    align-items: center;
-    gap: 8px;
-    margin-bottom: 6px;
-  }
-
-  .zone-dot {
-    width: 7px;
-    height: 7px;
-    border-radius: 50%;
-    flex-shrink: 0;
-  }
-
-  .zone-block.status-running .zone-dot {
-    background: var(--accent-cyan);
-    box-shadow: 0 0 6px var(--accent-cyan);
-  }
-  .zone-block.status-no_capacity .zone-dot {
-    background: var(--accent-red);
-    box-shadow: 0 0 6px var(--accent-red);
-  }
-  .zone-block.status-starting .zone-dot {
-    background: var(--accent-amber);
-    box-shadow: 0 0 6px var(--accent-amber);
-    animation: innerPulse 1.2s ease-in-out infinite;
-  }
-  .zone-block.status-unknown .zone-dot {
-    background: var(--text-muted);
-  }
-  .zone-block.status-quota_exceeded .zone-dot {
-    background: var(--accent-amber);
-    box-shadow: 0 0 6px var(--accent-amber);
-  }
-
-  .zone-block-label {
-    font-family: var(--font-mono);
-    font-size: 0.78rem;
-    font-weight: 500;
-    color: var(--text-primary);
-  }
-
-  .zone-block-gpu {
-    font-family: var(--font-mono);
-    font-size: 0.68rem;
-    color: var(--text-secondary);
-    margin-bottom: 4px;
-    margin-left: 15px;
-  }
-
-  .zone-block-status {
-    font-family: var(--font-mono);
-    font-size: 0.66rem;
-    text-transform: uppercase;
-    letter-spacing: 0.06em;
-    margin-left: 15px;
-    margin-bottom: 6px;
-  }
-
-  .zone-block.status-running .zone-block-status { color: var(--accent-cyan); }
-  .zone-block.status-no_capacity .zone-block-status { color: var(--accent-red); }
-  .zone-block.status-starting .zone-block-status { color: var(--accent-amber); }
-  .zone-block.status-unknown .zone-block-status { color: var(--text-muted); }
-  .zone-block.status-quota_exceeded .zone-block-status { color: var(--accent-red); }
-
-  .zone-block-tried {
-    font-family: var(--font-mono);
-    font-size: 0.62rem;
-    color: var(--text-muted);
-    opacity: 0.7;
-    margin-left: 15px;
-  }
-
-  /* ---- Request History Table ---- */
-  .history-section {
-    margin-bottom: 32px;
-    opacity: 0;
-    animation: fadeSlideUp 0.5s ease 1.0s forwards;
-  }
-
-  .history-card {
-    background: var(--bg-card);
-    border: 1px solid var(--border-subtle);
-    border-radius: 12px;
-    overflow: hidden;
-  }
-
-  .history-card-header {
-    font-family: var(--font-display);
-    font-size: 0.78rem;
-    font-weight: 500;
-    color: var(--text-muted);
-    text-transform: uppercase;
-    letter-spacing: 0.1em;
-    padding: 16px 20px 12px;
-    border-bottom: 1px solid var(--border-subtle);
-  }
-
-  .history-table-wrap {
-    overflow-x: auto;
-    scrollbar-width: thin;
-    scrollbar-color: var(--border-subtle) transparent;
-  }
-  .history-table-wrap::-webkit-scrollbar {
-    height: 4px;
-  }
-  .history-table-wrap::-webkit-scrollbar-track {
-    background: transparent;
-  }
-  .history-table-wrap::-webkit-scrollbar-thumb {
-    background: var(--border-subtle);
-    border-radius: 2px;
-  }
-
-  .history-table {
-    width: 100%;
-    border-collapse: collapse;
-    font-family: var(--font-mono);
-    font-size: 0.75rem;
-  }
-
-  .history-table thead th {
-    font-family: var(--font-display);
-    font-size: 0.68rem;
-    font-weight: 500;
-    color: var(--text-muted);
-    text-transform: uppercase;
-    letter-spacing: 0.08em;
-    text-align: left;
-    padding: 10px 16px;
-    border-bottom: 1px solid var(--border-subtle);
-    white-space: nowrap;
-  }
-
-  .history-table tbody tr {
-    transition: background 0.15s ease;
-  }
-  .history-table tbody tr:hover {
-    background: rgba(30, 41, 59, 0.3);
-  }
-
-  .history-table tbody td {
-    padding: 8px 16px;
-    color: var(--text-secondary);
-    white-space: nowrap;
-    border-bottom: 1px solid rgba(30, 41, 59, 0.3);
-  }
-
-  .history-table tbody tr:last-child td {
-    border-bottom: none;
-  }
-
-  .type-badge {
-    display: inline-block;
-    font-family: var(--font-mono);
-    font-size: 0.68rem;
-    font-weight: 600;
-    padding: 2px 10px;
-    border-radius: 10px;
-    letter-spacing: 0.04em;
-  }
-  .type-badge.type-asr {
-    color: var(--accent-cyan);
-    background: var(--accent-cyan-dim);
-  }
-  .type-badge.type-llm {
-    color: var(--accent-amber);
-    background: var(--accent-amber-dim);
-  }
-
-  .status-badge {
-    display: inline-block;
-    font-family: var(--font-mono);
-    font-size: 0.68rem;
-    font-weight: 600;
-    padding: 2px 10px;
-    border-radius: 10px;
-    letter-spacing: 0.04em;
-  }
-  .status-badge.status-ok {
-    color: var(--accent-green);
-    background: rgba(0, 230, 118, 0.1);
-  }
-  .status-badge.status-err {
-    color: var(--accent-red);
-    background: var(--accent-red-dim);
-  }
-
-  .history-empty {
-    padding: 24px;
-    text-align: center;
-    font-family: var(--font-mono);
-    font-size: 0.75rem;
-    color: var(--text-muted);
-    opacity: 0.5;
-  }
-
-  /* ---- Activity Log ---- */
-  .log-section {
-    opacity: 0;
-    animation: fadeSlideUp 0.5s ease 1.1s forwards;
-  }
-
-  .log-container {
-    background: var(--bg-card);
-    border: 1px solid var(--border-subtle);
-    border-radius: 12px;
-    overflow: hidden;
-  }
-
-  .log-header {
-    padding: 16px 20px 12px;
-    border-bottom: 1px solid var(--border-subtle);
-    font-family: var(--font-display);
-    font-size: 0.78rem;
-    font-weight: 500;
-    color: var(--text-muted);
-    text-transform: uppercase;
-    letter-spacing: 0.1em;
-  }
-
-  .log-entries {
-    max-height: 200px;
-    overflow-y: auto;
-    padding: 8px 0;
-    scrollbar-width: thin;
-    scrollbar-color: var(--border-subtle) transparent;
-  }
-  .log-entries::-webkit-scrollbar {
-    width: 4px;
-  }
-  .log-entries::-webkit-scrollbar-track {
-    background: transparent;
-  }
-  .log-entries::-webkit-scrollbar-thumb {
-    background: var(--border-subtle);
-    border-radius: 2px;
-  }
-
-  .log-entry {
-    padding: 6px 20px;
-    font-family: var(--font-mono);
-    font-size: 0.72rem;
-    color: var(--text-muted);
-    display: flex;
-    gap: 12px;
-    line-height: 1.5;
-    transition: background 0.15s ease;
-  }
-  .log-entry:hover {
-    background: rgba(30, 41, 59, 0.3);
-  }
-
-  .log-time {
-    color: var(--text-secondary);
-    white-space: nowrap;
-    flex-shrink: 0;
-  }
-
-  .log-msg { flex: 1; }
-  .log-msg.status-running { color: var(--accent-cyan); }
-  .log-msg.status-terminated { color: var(--accent-red); }
-  .log-msg.status-error { color: var(--accent-red); opacity: 0.8; }
-  .log-msg.status-action { color: var(--accent-amber); }
-
-  .log-empty {
-    padding: 20px;
-    text-align: center;
-    font-family: var(--font-mono);
-    font-size: 0.75rem;
-    color: var(--text-muted);
-    opacity: 0.5;
-  }
-
-  /* ---- Footer ---- */
-  .footer {
-    text-align: center;
-    padding: 24px 0 8px;
-    font-family: var(--font-mono);
-    font-size: 0.7rem;
-    color: var(--text-muted);
-    opacity: 0;
-    animation: fadeSlideUp 0.5s ease 1.2s forwards;
-  }
-
-  /* ---- Animations ---- */
-  @keyframes fadeSlideDown {
-    from { opacity: 0; transform: translateY(-12px); }
-    to { opacity: 1; transform: translateY(0); }
-  }
-  @keyframes fadeSlideUp {
-    from { opacity: 0; transform: translateY(16px); }
-    to { opacity: 1; transform: translateY(0); }
-  }
-
-  /* ---- Responsive ---- */
-  @media (max-width: 768px) {
-    .stats-grid {
-      grid-template-columns: repeat(2, 1fr);
-    }
-    .header-title {
-      font-size: 0.85rem;
-      letter-spacing: 0.15em;
-    }
-    .stat-value {
-      font-size: 1.5rem;
-    }
-    .controls {
-      flex-direction: column;
-      align-items: center;
-    }
-    .btn {
-      width: 100%;
-      max-width: 280px;
-    }
-    .instance-grid {
-      grid-template-columns: repeat(2, 1fr);
-    }
-    .zone-grid {
-      display: grid;
-      grid-template-columns: repeat(2, 1fr);
-    }
-    .zone-block {
-      min-width: 0;
-    }
-  }
-
-  @media (max-width: 480px) {
-    .stats-grid {
-      grid-template-columns: 1fr;
-    }
-    .container {
-      padding: 0 16px 32px;
-    }
-    .status-ring-container {
-      width: 100px;
-      height: 100px;
-    }
-    .status-ring {
-      width: 100px;
-      height: 100px;
-    }
-    .status-label {
-      font-size: 1.3rem;
-    }
-    .header {
-      flex-direction: column;
-      gap: 12px;
-      text-align: center;
-    }
-    .instance-grid {
-      grid-template-columns: 1fr;
-    }
-    .zone-grid {
-      grid-template-columns: 1fr;
-    }
-  }
-</style>
-</head>
-<body>
-
-<div class="error-banner" id="errorBanner">
-  <span id="errorText">Connection error: unable to reach proxy</span>
-</div>
-
-<div class="container">
-
-  <!-- Header -->
-  <header class="header">
-    <h1 class="header-title"><span>DICTIA</span> GPU MONITOR</h1>
-    <div class="proxy-badge" id="proxyBadge">
-      <div class="proxy-badge-dot"></div>
-      <span id="proxyStatus">proxy: connecting...</span>
-    </div>
-  </header>
-
-  <!-- Hero Status -->
-  <section class="hero">
-    <div class="status-ring-container">
-      <div class="status-ring" id="statusRing">
-        <div class="status-ring-inner"></div>
-      </div>
-    </div>
-    <div class="status-label" id="statusLabel">---</div>
-    <div class="zone-label" id="zoneLabel">---</div>
-    <div class="active-requests-badge" id="activeRequestsBadge">
-      <span id="activeRequestsText">0 active requests</span>
-    </div>
-  </section>
-
-  <!-- Stats Grid -->
-  <section class="stats-grid">
-    <div class="stat-card">
-      <div class="stat-value" id="gpuTime">--</div>
-      <div class="stat-label">GPU Time</div>
-      <div class="stat-sublabel">This Month</div>
-    </div>
-    <div class="stat-card">
-      <div class="stat-value" id="estCost">--</div>
-      <div class="stat-label">Estimated Cost</div>
-      <div class="stat-sublabel" id="costBreakdown">USD</div>
-    </div>
-    <div class="stat-card">
-      <div class="stat-value" id="reqCount">--</div>
-      <div class="stat-label">Total Requests</div>
-      <div class="stat-sublabel">This Month</div>
-    </div>
-    <div class="stat-card">
-      <div class="stat-value" id="budgetLeft">--</div>
-      <div class="stat-label">Remaining</div>
-      <div class="stat-sublabel" id="budgetOfLabel">of --h</div>
-    </div>
-  </section>
-
-  <!-- Budget Bar -->
-  <section class="budget-section">
-    <div class="budget-bar-container">
-      <div class="budget-bar-header">
-        <span class="budget-bar-title">Monthly Budget</span>
-        <span class="budget-bar-percent" id="budgetPercent">--%</span>
-      </div>
-      <div class="budget-bar-track">
-        <div class="budget-bar-fill" id="budgetFill" style="width: 0%"></div>
-      </div>
-      <div class="budget-bar-text" id="budgetText">--h / --h</div>
-    </div>
-  </section>
-
-  <!-- Controls -->
-  <section class="controls">
-    <button class="btn btn-start" id="btnStart" onclick="gpuAction('start')" disabled>
-      <span class="spinner"></span>
-      <span class="btn-text">START GPU</span>
-    </button>
-    <button class="btn btn-stop" id="btnStop" onclick="gpuAction('stop')" disabled>
-      <span class="spinner"></span>
-      <span class="btn-text">STOP GPU</span>
-    </button>
-  </section>
-
-  <!-- Instance Details -->
-  <section class="instance-section" id="instanceSection">
-    <div class="instance-card">
-      <div class="instance-card-header">Instance Details</div>
-      <div class="instance-grid" id="instanceGrid">
-        <div class="instance-item">
-          <div class="instance-item-label">IP</div>
-          <div class="instance-item-value" id="instIp">---</div>
-        </div>
-        <div class="instance-item">
-          <div class="instance-item-label">Machine</div>
-          <div class="instance-item-value" id="instMachine">---</div>
-        </div>
-        <div class="instance-item">
-          <div class="instance-item-label">GPU</div>
-          <div class="instance-item-value" id="instGpu">---</div>
-        </div>
-        <div class="instance-item">
-          <div class="instance-item-label">Idle</div>
-          <div class="instance-item-value" id="instIdle">---</div>
-        </div>
-        <div class="instance-item">
-          <div class="instance-item-label">OAuth Token</div>
-          <div class="instance-item-value" id="instToken">---</div>
-        </div>
-        <div class="instance-item">
-          <div class="instance-item-label">Cost Rate</div>
-          <div class="instance-item-value" id="instCostRate">---</div>
-        </div>
-      </div>
-    </div>
-  </section>
-
-  <!-- Zone Fallback Map -->
-  <section class="zone-section" id="zoneSection">
-    <div class="zone-card">
-      <div class="zone-card-header">Zone Fallback Map</div>
-      <div class="zone-grid" id="zoneGrid">
-        <!-- Populated by JS -->
-      </div>
-    </div>
-  </section>
-
-  <!-- Request History -->
-  <section class="history-section" id="historySection">
-    <div class="history-card">
-      <div class="history-card-header">Request History</div>
-      <div class="history-table-wrap">
-        <table class="history-table" id="historyTable">
-          <thead>
-            <tr>
-              <th>Time</th>
-              <th>Type</th>
-              <th>Duration</th>
-              <th>Status</th>
-              <th>Zone</th>
-            </tr>
-          </thead>
-          <tbody id="historyBody">
-            <tr><td colspan="5"><div class="history-empty">No requests yet</div></td></tr>
-          </tbody>
-        </table>
-      </div>
-    </div>
-  </section>
-
-  <!-- Event Log -->
-  <section class="log-section">
-    <div class="log-container">
-      <div class="log-header">Event Log</div>
-      <div class="log-entries" id="logEntries">
-        <div class="log-empty">Waiting for data...</div>
-      </div>
-    </div>
-  </section>
-
-  <!-- Footer -->
-  <div class="footer">
-    Last updated: <span id="lastUpdated">--:--:--</span>
-  </div>
-
-</div>
-
-<script>
-(function() {
-  // ---------- State ----------
-  let lastGpuState = null;
-  let logItems = [];
-  const MAX_LOG = 10;
-  let actionInProgress = false;
-
-  // ---------- DOM refs ----------
-  const $ = id => document.getElementById(id);
-
-  const els = {
-    errorBanner:       $('errorBanner'),
-    errorText:         $('errorText'),
-    proxyBadge:        $('proxyBadge'),
-    proxyStatus:       $('proxyStatus'),
-    statusRing:        $('statusRing'),
-    statusLabel:       $('statusLabel'),
-    zoneLabel:         $('zoneLabel'),
-    activeReqBadge:    $('activeRequestsBadge'),
-    activeReqText:     $('activeRequestsText'),
-    gpuTime:           $('gpuTime'),
-    estCost:           $('estCost'),
-    reqCount:          $('reqCount'),
-    budgetLeft:        $('budgetLeft'),
-    budgetOfLabel:     $('budgetOfLabel'),
-    budgetPercent:     $('budgetPercent'),
-    budgetFill:        $('budgetFill'),
-    budgetText:        $('budgetText'),
-    btnStart:          $('btnStart'),
-    btnStop:           $('btnStop'),
-    logEntries:        $('logEntries'),
-    lastUpdated:       $('lastUpdated'),
-  };
-
-  // ---------- Helpers ----------
-  function formatTime(hours) {
-    const h = Math.floor(hours);
-    const m = Math.round((hours - h) * 60);
-    if (h === 0) return `${m}m`;
-    if (m === 0) return `${h}h`;
-    return `${h}h ${m}m`;
-  }
-
-  function formatHours(hours) {
-    return hours.toFixed(1) + 'h';
-  }
-
-  function timestamp() {
-    return new Date().toLocaleTimeString('en-US', { hour12: false });
-  }
-
-  function addLog(msg, statusClass) {
-    logItems.unshift({ time: timestamp(), msg, statusClass });
-    if (logItems.length > MAX_LOG) logItems.pop();
-    renderLog();
-  }
-
-  function renderLog() {
-    if (logItems.length === 0) {
-      els.logEntries.innerHTML = '<div class="log-empty">Waiting for data...</div>';
-      return;
-    }
-    els.logEntries.innerHTML = logItems.map(item =>
-      `<div class="log-entry">
-        <span class="log-time">${item.time}</span>
-        <span class="log-msg ${item.statusClass || ''}">${item.msg}</span>
-      </div>`
-    ).join('');
-  }
-
-  function showError(msg) {
-    els.errorText.textContent = msg;
-    els.errorBanner.classList.add('visible');
-  }
-
-  function hideError() {
-    els.errorBanner.classList.remove('visible');
-  }
-
-  // ---------- Fetch & Update ----------
-  async function fetchData() {
-    try {
-      const [healthRes, statsRes] = await Promise.all([
-        fetch('/health'),
-        fetch('/stats'),
-      ]);
-
-      if (!healthRes.ok || !statsRes.ok) {
-        throw new Error(`HTTP ${healthRes.status} / ${statsRes.status}`);
-      }
-
-      const health = await healthRes.json();
-      const stats = await statsRes.json();
-
-      hideError();
-      updateDashboard(health, stats);
-      updateAdvanced(health, stats);
-    } catch (err) {
-      showError('Connection error: ' + err.message);
-      addLog('Fetch failed: ' + err.message, 'status-error');
-    }
-  }
-
-  function updateDashboard(health, stats) {
-    // Proxy status
-    const proxyOk = health.proxy === 'healthy';
-    els.proxyStatus.textContent = proxyOk ? 'proxy: healthy' : 'proxy: ' + health.proxy;
-    els.proxyBadge.classList.toggle('unhealthy', !proxyOk);
-
-    // GPU status
-    const gpuState = (health.gpu_instance || 'unknown').toUpperCase();
-    const isRunning = gpuState === 'RUNNING';
-
-    els.statusRing.classList.toggle('running', isRunning);
-    els.statusLabel.textContent = gpuState;
-    els.statusLabel.classList.toggle('running', isRunning);
-
-    // Zone
-    els.zoneLabel.textContent = health.gpu_zone || stats.active_zone || '---';
-
-    // Active requests
-    const activeReq = health.active_requests || 0;
-    if (activeReq > 0) {
-      els.activeReqBadge.classList.add('visible');
-      els.activeReqText.textContent = activeReq + ' active request' + (activeReq !== 1 ? 's' : '');
-    } else {
-      els.activeReqBadge.classList.remove('visible');
-    }
-
-    // Log state changes
-    if (lastGpuState !== null && lastGpuState !== gpuState) {
-      addLog(`GPU state changed: ${lastGpuState} \u2192 ${gpuState}`, isRunning ? 'status-running' : 'status-terminated');
-    } else if (lastGpuState === null) {
-      addLog(`Dashboard initialized \u2014 GPU: ${gpuState}`, isRunning ? 'status-running' : 'status-terminated');
-    }
-    lastGpuState = gpuState;
-
-    // Stats cards
-    const gpuHours = stats.gpu_hours || health.usage?.gpu_hours || 0;
-    els.gpuTime.textContent = formatTime(gpuHours);
-
-    const cost = stats.estimated_cost_usd;
-    els.estCost.textContent = cost != null ? '$' + cost.toFixed(2) : '--';
-    const gpuCost = stats.gpu_cost_usd;
-    const fixedCost = stats.fixed_cost_usd;
-    const breakdownEl = $('costBreakdown');
-    if (breakdownEl && gpuCost != null && fixedCost != null) {
-      breakdownEl.textContent = 'GPU $' + gpuCost.toFixed(2) + ' + Infra $' + fixedCost.toFixed(2);
-    }
-
-    const requests = stats.requests_count != null ? stats.requests_count : (health.usage?.requests_count || 0);
-    els.reqCount.textContent = requests;
-
-    const remaining = stats.remaining_hours != null ? stats.remaining_hours : 0;
-    els.budgetLeft.textContent = formatHours(remaining);
-
-    const limit = stats.monthly_limit_hours || health.usage?.gpu_limit_hours || 50;
-    els.budgetOfLabel.textContent = 'of ' + limit + 'h';
-
-    // Budget bar
-    const used = gpuHours;
-    const pct = limit > 0 ? Math.min((used / limit) * 100, 100) : 0;
-    els.budgetPercent.textContent = pct.toFixed(1) + '%';
-    els.budgetFill.style.width = pct + '%';
-    els.budgetText.textContent = used.toFixed(2) + 'h / ' + limit.toFixed(1) + 'h';
-
-    // Color the percent based on usage
-    if (pct > 80) {
-      els.budgetPercent.style.color = 'var(--accent-red)';
-    } else if (pct > 50) {
-      els.budgetPercent.style.color = 'var(--accent-amber)';
-    } else {
-      els.budgetPercent.style.color = 'var(--accent-cyan)';
-    }
-
-    // Buttons
-    if (!actionInProgress) {
-      els.btnStart.disabled = isRunning;
-      els.btnStop.disabled = !isRunning;
-    }
-
-    // Timestamp
-    els.lastUpdated.textContent = timestamp();
-  }
-
-  // ---------- Advanced Monitoring ----------
-  function formatGpuModel(raw) {
-    if (!raw) return '---';
-    // "nvidia-l4" → "NVIDIA L4", "nvidia-tesla-t4" → "NVIDIA Tesla T4"
-    return raw
-      .split('-')
-      .map(function(part) {
-        if (part.toLowerCase() === 'nvidia') return 'NVIDIA';
-        if (part.toLowerCase() === 'tesla') return 'Tesla';
-        return part.toUpperCase();
-      })
-      .join(' ');
-  }
-
-  function formatSecondsShort(sec) {
-    if (sec == null) return '---';
-    const m = Math.floor(sec / 60);
-    const s = Math.floor(sec % 60);
-    if (m > 0) return m + 'm ' + s + 's';
-    return s + 's';
-  }
-
-  function formatMinutesFromSec(sec) {
-    if (sec == null) return '---';
-    const m = Math.floor(sec / 60);
-    const s = Math.floor(sec % 60);
-    return m + 'm ' + s + 's';
-  }
-
-  function tokenExpiryHtml(sec) {
-    if (sec == null) return '---';
-    const m = Math.floor(sec / 60);
-    let cls, label;
-    if (sec < 60) {
-      cls = 'color-red';
-      label = 'expires in ' + sec + 's';
-    } else if (sec < 300) {
-      cls = 'color-amber';
-      label = 'expires in ' + m + 'm';
-    } else {
-      cls = 'color-green';
-      label = 'expires in ' + m + 'm';
-    }
-    return '<span class="sub-text ' + cls + '">' + label + '</span>';
-  }
-
-  function updateAdvanced(health, stats) {
-    // ---- Instance Details ----
-    const ip = health.gpu_ip;
-    $('instIp').textContent = ip || '---';
-
-    $('instMachine').textContent = health.machine_type || '---';
-    $('instGpu').textContent = formatGpuModel(health.gpu_model);
-
-    // Idle with shutdown countdown
-    const idleSec = health.idle_seconds;
-    const shutdownIn = health.auto_shutdown_in;
-    let idleHtml = formatSecondsShort(idleSec);
-    if (shutdownIn != null) {
-      idleHtml += ' <span class="sub-text color-amber">shutdown in ' + formatSecondsShort(shutdownIn) + '</span>';
-    }
-    $('instIdle').innerHTML = idleHtml;
-
-    // OAuth token
-    const tokenSec = health.token_expires_in;
-    if (tokenSec != null) {
-      const m = Math.floor(tokenSec / 60);
-      $('instToken').innerHTML = m + 'm' + tokenExpiryHtml(tokenSec);
-    } else {
-      $('instToken').textContent = '---';
-    }
-
-    // Cost rate
-    const costRate = stats.cost_per_hour;
-    $('instCostRate').textContent = costRate != null ? '$' + costRate.toFixed(2) + '/hr' : '---';
-
-    // ---- Zone Fallback Map ----
-    const zoneGrid = $('zoneGrid');
-    const fallbacks = stats.zone_fallbacks;
-    const activeZone = stats.active_zone || health.gpu_zone || '';
-
-    if (fallbacks && fallbacks.length > 0) {
-      zoneGrid.innerHTML = fallbacks.map(function(z) {
-        const st = (z.status || 'unknown').replace(/\s+/g, '_');
-        const isActive = z.label === activeZone;
-        const cls = 'zone-block status-' + st + (isActive ? ' active-zone' : '');
-        const gpuLabel = formatGpuModel(z.gpu);
-        const lastTried = z.last_tried ? z.last_tried.split('T')[1].substring(0, 5) : '--:--';
-        const statusText = (z.status || 'unknown').replace(/_/g, ' ');
-        return '<div class="' + cls + '">' +
-          '<div class="zone-block-top">' +
-            '<div class="zone-dot"></div>' +
-            '<div class="zone-block-label">' + escapeHtml(z.label) + '</div>' +
-          '</div>' +
-          '<div class="zone-block-gpu">' + escapeHtml(gpuLabel) + '</div>' +
-          '<div class="zone-block-status">' + escapeHtml(statusText) + '</div>' +
-          '<div class="zone-block-tried">last tried: ' + lastTried + '</div>' +
-        '</div>';
-      }).join('');
-    } else {
-      zoneGrid.innerHTML = '<div style="color:var(--text-muted);font-family:var(--font-mono);font-size:0.75rem;opacity:0.5;padding:8px;">No zone data available</div>';
-    }
-
-    // ---- Request History Table ----
-    const historyBody = $('historyBody');
-    const recent = stats.recent_requests;
-
-    if (recent && recent.length > 0) {
-      historyBody.innerHTML = recent.map(function(req) {
-        // Time: HH:MM:SS
-        let timeStr = '---';
-        if (req.time) {
-          const tPart = req.time.split('T')[1];
-          timeStr = tPart ? tPart.substring(0, 8) : req.time;
-        }
-
-        // Type badge
-        const typeLower = (req.type || '').toLowerCase();
-        const typeCls = typeLower === 'asr' ? 'type-asr' : 'type-llm';
-        const typeBadge = '<span class="type-badge ' + typeCls + '">' + escapeHtml(req.type || '---') + '</span>';
-
-        // Duration
-        let durStr = '---';
-        if (req.duration_sec != null) {
-          if (req.duration_sec >= 60) {
-            const dm = Math.floor(req.duration_sec / 60);
-            const ds = Math.floor(req.duration_sec % 60);
-            durStr = dm + 'm ' + ds + 's';
-          } else {
-            durStr = req.duration_sec.toFixed(1) + 's';
-          }
-        }
-
-        // Status badge
-        const statusOk = req.status >= 200 && req.status < 300;
-        const statusCls = statusOk ? 'status-ok' : 'status-err';
-        const statusBadge = '<span class="status-badge ' + statusCls + '">' + (req.status || '---') + '</span>';
-
-        // Zone (muted)
-        const zone = req.zone || '---';
-
-        return '<tr>' +
-          '<td>' + timeStr + '</td>' +
-          '<td>' + typeBadge + '</td>' +
-          '<td>' + durStr + '</td>' +
-          '<td>' + statusBadge + '</td>' +
-          '<td style="color:var(--text-muted)">' + escapeHtml(zone) + '</td>' +
-        '</tr>';
-      }).join('');
-    } else {
-      historyBody.innerHTML = '<tr><td colspan="5"><div class="history-empty">No requests yet</div></td></tr>';
-    }
-  }
-
-  function escapeHtml(str) {
-    const d = document.createElement('div');
-    d.textContent = str;
-    return d.innerHTML;
-  }
-
-  // ---------- GPU Actions ----------
-  window.gpuAction = async function(action) {
-    const btn = action === 'start' ? els.btnStart : els.btnStop;
-    const endpoint = action === 'start' ? '/gpu/start' : '/gpu/stop';
-
-    btn.classList.add('loading');
-    btn.disabled = true;
-    els.btnStart.disabled = true;
-    els.btnStop.disabled = true;
-    actionInProgress = true;
-
-    addLog(`Sending ${action.toUpperCase()} command...`, 'status-action');
-
-    try {
-      const res = await fetch(endpoint, { method: 'POST' });
-      if (!res.ok) {
-        const body = await res.text();
-        throw new Error(`HTTP ${res.status}: ${body}`);
-      }
-      const data = await res.json().catch(() => ({}));
-      addLog(`${action.toUpperCase()} command accepted`, 'status-action');
-
-      // Small delay then refresh to let the backend state propagate
-      await new Promise(r => setTimeout(r, 2000));
-      await fetchData();
-    } catch (err) {
-      addLog(`${action.toUpperCase()} failed: ${err.message}`, 'status-error');
-      showError(`Action failed: ${err.message}`);
-    } finally {
-      btn.classList.remove('loading');
-      actionInProgress = false;
-      // Re-enable will happen on next fetchData
-      await fetchData();
-    }
-  };
-
-  // ---------- Init ----------
-  fetchData();
-  setInterval(fetchData, 10000);
-})();
-</script>
-
-</body>
-</html>
diff --git a/deployment/asr-proxy/proxy.py b/deployment/asr-proxy/proxy.py
deleted file mode 100644
index db20d1a..0000000
--- a/deployment/asr-proxy/proxy.py
+++ /dev/null
@@ -1,741 +0,0 @@
-"""DictIA ASR Proxy - Auto-start/stop GCP GPU for WhisperX + Ollama.
-
-Uses Google Cloud Compute REST API directly (no gcloud CLI needed).
-Proxies both ASR (WhisperX) and LLM (Ollama) requests.
-Multi-zone fallback across Canada (Montreal + Toronto).
-"""
-
-import asyncio
-import json
-import logging
-import os
-import time
-
-import httpx
-import jwt as pyjwt
-from pathlib import Path
-
-from fastapi import FastAPI, Request
-from fastapi.responses import HTMLResponse, JSONResponse, Response
-
-logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
-log = logging.getLogger("asr-proxy")
-
-# Config — paths relative to this script's directory by default
-SCRIPT_DIR = Path(__file__).parent
-GCP_PROJECT = os.getenv("GCP_PROJECT", "speakr-gpu")
-WHISPERX_PORT = int(os.getenv("WHISPERX_PORT", "9000"))
-OLLAMA_PORT = int(os.getenv("OLLAMA_PORT", "11434"))
-IDLE_TIMEOUT = int(os.getenv("IDLE_TIMEOUT", "300"))
-CREDS_FILE = os.getenv("GOOGLE_APPLICATION_CREDENTIALS", str(SCRIPT_DIR / "gcp-credentials.json"))
-STATS_FILE = os.getenv("STATS_FILE", str(SCRIPT_DIR / "usage-stats.json"))
-MONTHLY_LIMIT_HOURS = float(os.getenv("MONTHLY_LIMIT_HOURS", "30"))
-# Real GCP cost per GPU-hour (g2-standard-4 + L4): GPU ($0.837) + vCPU ($0.151) + RAM ($0.069)
-GPU_COST_PER_HOUR = float(os.getenv("GPU_COST_PER_HOUR", "1.06"))
-# Fixed monthly costs: SSD disks ($5.66) + snapshots ($4.19) ≈ $9.85/month
-FIXED_MONTHLY_COST = float(os.getenv("FIXED_MONTHLY_COST", "9.85"))
-SNAPSHOT_NAME = "whisperx-gpu-snapshot"
-HEALTH_POLL_INTERVAL = 5
-BOOT_TIMEOUT = 300
-
-# Zone fallback order — Canada only, Montreal first
-ZONE_FALLBACKS = [
-    {
-        "zone": "northamerica-northeast1-b",
-        "instance": "whisperx-gpu-mtl1",
-        "machine_type": "g2-standard-4",
-        "accelerator": "nvidia-l4",
-        "accel_count": 1,
-        "label": "Montreal-b (L4)",
-    },
-    {
-        "zone": "northamerica-northeast1-c",
-        "instance": "whisperx-gpu-mtl2",
-        "machine_type": "n1-standard-4",
-        "accelerator": "nvidia-tesla-t4",
-        "accel_count": 1,
-        "label": "Montreal-c (T4)",
-    },
-    {
-        "zone": "northamerica-northeast2-a",
-        "instance": "whisperx-gpu-tor1",
-        "machine_type": "g2-standard-4",
-        "accelerator": "nvidia-l4",
-        "accel_count": 1,
-        "label": "Toronto-a (L4)",
-    },
-    {
-        "zone": "northamerica-northeast2-b",
-        "instance": "whisperx-gpu",
-        "machine_type": "g2-standard-4",
-        "accelerator": "nvidia-l4",
-        "accel_count": 1,
-        "label": "Toronto-b (L4)",
-    },
-]
-
-STARTUP_SCRIPT = """#!/bin/bash
-systemctl start docker
-sleep 5
-docker start whisperx-asr 2>/dev/null || true
-systemctl start ollama 2>/dev/null || true
-"""
-
-app = FastAPI(title="DictIA ASR Proxy")
-
-# State
-last_request_time = 0.0
-active_requests = 0
-gpu_ip: str | None = None
-active_zone: dict | None = None
-shutdown_task: asyncio.Task | None = None
-
-# Request history tracking (in-memory, last 20 requests)
-request_history: list[dict] = []
-MAX_HISTORY = 20
-
-# Zone status tracking
-zone_status: dict[str, dict] = {}
-
-# Startup lock and failure cooldown
-_startup_lock: asyncio.Lock | None = None
-_last_failure_time: float = 0
-FAILURE_COOLDOWN = 180
-
-# OAuth2 token cache
-_access_token: str | None = None
-_token_expiry: float = 0
-
-
-# --- Usage Stats ---
-
-def load_stats() -> dict:
-    try:
-        with open(STATS_FILE) as f:
-            return json.load(f)
-    except (FileNotFoundError, json.JSONDecodeError):
-        return {"gpu_seconds": 0, "month": time.strftime("%Y-%m"), "requests": 0, "last_start": 0}
-
-
-def save_stats(stats: dict):
-    with open(STATS_FILE, "w") as f:
-        json.dump(stats, f, indent=2)
-
-
-def track_gpu_time():
-    stats = load_stats()
-    current_month = time.strftime("%Y-%m")
-    if stats.get("month") != current_month:
-        stats = {"gpu_seconds": 0, "month": current_month, "requests": 0, "last_start": 0}
-    if stats.get("last_start", 0) > 0:
-        elapsed = time.time() - stats["last_start"]
-        stats["gpu_seconds"] += elapsed
-    stats["last_start"] = 0
-    save_stats(stats)
-
-
-def check_budget() -> tuple[bool, float]:
-    stats = load_stats()
-    current_month = time.strftime("%Y-%m")
-    if stats.get("month") != current_month:
-        return True, 0.0
-    hours_used = stats.get("gpu_seconds", 0) / 3600
-    return hours_used < MONTHLY_LIMIT_HOURS, hours_used
-
-
-# --- GCP Auth ---
-
-async def get_access_token() -> str:
-    global _access_token, _token_expiry
-    if _access_token and time.time() < _token_expiry - 60:
-        return _access_token
-    with open(CREDS_FILE) as f:
-        creds = json.load(f)
-    cred_type = creds.get("type", "authorized_user")
-    async with httpx.AsyncClient() as client:
-        if cred_type == "service_account":
-            now = int(time.time())
-            payload = {
-                "iss": creds["client_email"],
-                "scope": "https://www.googleapis.com/auth/compute",
-                "aud": "https://oauth2.googleapis.com/token",
-                "iat": now,
-                "exp": now + 3600,
-            }
-            signed = pyjwt.encode(payload, creds["private_key"], algorithm="RS256")
-            resp = await client.post(
-                "https://oauth2.googleapis.com/token",
-                data={
-                    "grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
-                    "assertion": signed,
-                },
-            )
-        else:
-            resp = await client.post(
-                "https://oauth2.googleapis.com/token",
-                data={
-                    "client_id": creds["client_id"],
-                    "client_secret": creds["client_secret"],
-                    "refresh_token": creds["refresh_token"],
-                    "grant_type": "refresh_token",
-                },
-            )
-        resp.raise_for_status()
-        data = resp.json()
-        _access_token = data["access_token"]
-        _token_expiry = time.time() + data.get("expires_in", 3600)
-        log.info(f"Refreshed GCP access token ({cred_type})")
-        return _access_token
-
-
-# --- GCP Compute API ---
-
-COMPUTE_BASE = "https://compute.googleapis.com/compute/v1"
-
-
-async def gcp_api(method: str, url: str, **kwargs) -> httpx.Response:
-    token = await get_access_token()
-    async with httpx.AsyncClient(timeout=60) as client:
-        resp = await client.request(
-            method, url,
-            headers={"Authorization": f"Bearer {token}"},
-            **kwargs,
-        )
-        return resp
-
-
-async def get_instance_info(zone: str, instance: str) -> dict | None:
-    url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}"
-    resp = await gcp_api("GET", url)
-    if resp.status_code == 404:
-        return None
-    if resp.status_code >= 400:
-        log.error(f"GCP API error {resp.status_code}: {resp.text}")
-        return None
-    return resp.json()
-
-
-def extract_ip(instance_data: dict) -> str:
-    interfaces = instance_data.get("networkInterfaces", [])
-    if interfaces:
-        access = interfaces[0].get("accessConfigs", [])
-        if access:
-            return access[0].get("natIP", "")
-    return ""
-
-
-async def start_instance_in_zone(zone: str, instance: str) -> bool:
-    url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}/start"
-    resp = await gcp_api("POST", url)
-    if resp.status_code < 400:
-        log.info(f"Start requested: {instance} in {zone}")
-        return True
-    log.warning(f"Failed to start {instance} in {zone}: {resp.status_code} {resp.text}")
-    return False
-
-
-async def stop_instance_in_zone(zone: str, instance: str):
-    url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}/stop"
-    resp = await gcp_api("POST", url)
-    if resp.status_code < 400:
-        log.info(f"Stop requested: {instance} in {zone}")
-    else:
-        log.error(f"Failed to stop {instance} in {zone}: {resp.status_code}")
-
-
-async def create_instance_from_snapshot(config: dict) -> bool:
-    zone = config["zone"]
-    instance = config["instance"]
-    machine = config["machine_type"]
-    accel = config["accelerator"]
-    accel_count = config["accel_count"]
-
-    log.info(f"Creating {instance} in {zone} from snapshot...")
-
-    body = {
-        "name": instance,
-        "machineType": f"zones/{zone}/machineTypes/{machine}",
-        "disks": [{
-            "boot": True,
-            "autoDelete": True,
-            "initializeParams": {
-                "diskSizeGb": "50",
-                "diskType": f"zones/{zone}/diskTypes/pd-ssd",
-                "sourceSnapshot": f"global/snapshots/{SNAPSHOT_NAME}",
-            },
-        }],
-        "networkInterfaces": [{
-            "network": "global/networks/default",
-            "accessConfigs": [{"type": "ONE_TO_ONE_NAT", "name": "External NAT"}],
-        }],
-        "guestAccelerators": [{
-            "acceleratorType": f"zones/{zone}/acceleratorTypes/{accel}",
-            "acceleratorCount": accel_count,
-        }],
-        "scheduling": {
-            "onHostMaintenance": "TERMINATE",
-            "automaticRestart": False,
-        },
-        "tags": {"items": ["whisperx-gpu"]},
-        "metadata": {
-            "items": [{"key": "startup-script", "value": STARTUP_SCRIPT}],
-        },
-    }
-
-    url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances"
-    resp = await gcp_api("POST", url, json=body)
-
-    if resp.status_code < 400:
-        log.info(f"Created {instance} in {zone}")
-        return True
-
-    error_text = resp.text
-    if "ZONE_RESOURCE_POOL_EXHAUSTED" in error_text:
-        log.warning(f"No capacity in {zone} -- skipping")
-    elif "QUOTA" in error_text.upper():
-        log.warning(f"Quota exceeded for {zone}: {error_text[:200]}")
-    else:
-        log.error(f"Failed to create in {zone}: {resp.status_code} {error_text[:200]}")
-    return False
-
-
-# --- Core Logic ---
-
-async def wait_for_running(zone: str, instance: str, timeout: int = 120, grace: int = 15) -> bool:
-    gone_count = 0
-    start_time = time.time()
-    for _ in range(timeout // 5):
-        info = await get_instance_info(zone, instance)
-        if info and info.get("status") == "RUNNING":
-            return True
-        status = info.get("status", "UNKNOWN") if info else "GONE"
-        elapsed = time.time() - start_time
-        if status == "GONE":
-            gone_count += 1
-            if gone_count >= 2:
-                log.warning(f"{instance} in {zone}: instance disappeared (no capacity)")
-                return False
-        if status in ("STOPPING",):
-            log.warning(f"{instance} in {zone}: status {status} (no capacity)")
-            return False
-        if status in ("TERMINATED", "STOPPED") and elapsed > grace:
-            log.warning(f"{instance} in {zone}: status {status} after {elapsed:.0f}s (no capacity)")
-            return False
-        await asyncio.sleep(5)
-    return False
-
-
-async def delete_instance(zone: str, instance: str):
-    url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}"
-    resp = await gcp_api("DELETE", url)
-    if resp.status_code < 400:
-        log.info(f"Deleted {instance} in {zone} to free quota")
-    elif resp.status_code == 404:
-        pass
-    else:
-        log.warning(f"Failed to delete {instance} in {zone}: {resp.status_code}")
-
-
-async def ensure_gpu_running() -> str:
-    global gpu_ip, active_zone, _last_failure_time
-
-    if _last_failure_time > 0:
-        remaining = FAILURE_COOLDOWN - (time.time() - _last_failure_time)
-        if remaining > 0:
-            log.info(f"GPU cooldown active ({int(remaining)}s remaining), waiting...")
-            await asyncio.sleep(remaining)
-            _last_failure_time = 0
-
-    async with _startup_lock:
-        ok, hours = check_budget()
-        if not ok:
-            raise RuntimeError(f"Monthly GPU limit reached ({hours:.1f}h / {MONTHLY_LIMIT_HOURS}h)")
-
-        if active_zone:
-            info = await get_instance_info(active_zone["zone"], active_zone["instance"])
-            if info and info.get("status") == "RUNNING":
-                gpu_ip = extract_ip(info)
-                if gpu_ip:
-                    return gpu_ip
-
-        errors = []
-
-        for config in ZONE_FALLBACKS:
-            zone = config["zone"]
-            instance = config["instance"]
-            label = config["label"]
-
-            log.info(f"Trying {label}...")
-            info = await get_instance_info(zone, instance)
-
-            if info is None:
-                created = await create_instance_from_snapshot(config)
-                if not created:
-                    zone_status[label] = {
-                        "status": "no_capacity",
-                        "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                        "last_error": "no capacity",
-                    }
-                    errors.append(f"{label}: no capacity")
-                    continue
-                if not await wait_for_running(zone, instance, grace=30):
-                    zone_status[label] = {
-                        "status": "error",
-                        "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                        "last_error": "created but failed to start",
-                    }
-                    errors.append(f"{label}: created but failed to start")
-                    await delete_instance(zone, instance)
-                    await asyncio.sleep(3)
-                    continue
-            else:
-                status = info.get("status", "UNKNOWN")
-
-                if status == "RUNNING":
-                    pass
-                elif status in ("TERMINATED", "STOPPED"):
-                    zone_status[label] = {
-                        "status": "starting",
-                        "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                        "last_error": None,
-                    }
-                    started = await start_instance_in_zone(zone, instance)
-                    if not started:
-                        zone_status[label] = {
-                            "status": "error",
-                            "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                            "last_error": "start rejected",
-                        }
-                        errors.append(f"{label}: start rejected")
-                        continue
-                    if not await wait_for_running(zone, instance, grace=20):
-                        zone_status[label] = {
-                            "status": "error",
-                            "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                            "last_error": "didn't reach RUNNING",
-                        }
-                        errors.append(f"{label}: didn't reach RUNNING")
-                        continue
-                elif status in ("STAGING", "PROVISIONING"):
-                    zone_status[label] = {
-                        "status": "starting",
-                        "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                        "last_error": None,
-                    }
-                    if not await wait_for_running(zone, instance):
-                        zone_status[label] = {
-                            "status": "error",
-                            "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                            "last_error": f"stuck in {status}",
-                        }
-                        errors.append(f"{label}: stuck in {status}")
-                        continue
-                elif status == "STOPPING":
-                    log.info(f"{label}: STOPPING, deleting to free quota")
-                    await delete_instance(zone, instance)
-                    await asyncio.sleep(3)
-                    zone_status[label] = {
-                        "status": "error",
-                        "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                        "last_error": "was STOPPING, deleted",
-                    }
-                    errors.append(f"{label}: was STOPPING, deleted")
-                    continue
-
-            info = await get_instance_info(zone, instance)
-            if info and info.get("status") == "RUNNING":
-                gpu_ip = extract_ip(info)
-                if gpu_ip:
-                    active_zone = config
-                    _last_failure_time = 0
-                    zone_status[label] = {
-                        "status": "running",
-                        "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                        "last_error": None,
-                    }
-                    stats = load_stats()
-                    stats["last_start"] = time.time()
-                    stats["requests"] = stats.get("requests", 0) + 1
-                    stats["active_zone"] = label
-                    save_stats(stats)
-                    log.info(f"GPU ready in {label}, IP: {gpu_ip}")
-                    return gpu_ip
-
-            zone_status[label] = {
-                "status": "error",
-                "last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
-                "last_error": "running but no IP",
-            }
-            errors.append(f"{label}: running but no IP")
-
-        _last_failure_time = time.time()
-        raise RuntimeError(
-            f"No GPU available in any Canadian zone. Tried: {'; '.join(errors)}"
-        )
-
-
-async def ensure_gpu_ready() -> str:
-    ip = await ensure_gpu_running()
-    url = f"http://{ip}:{WHISPERX_PORT}/health"
-    log.info(f"Waiting for WhisperX at {url}...")
-    async with httpx.AsyncClient(timeout=10) as client:
-        for _ in range(BOOT_TIMEOUT // HEALTH_POLL_INTERVAL):
-            try:
-                resp = await client.get(url)
-                if resp.status_code == 200:
-                    log.info("WhisperX is healthy!")
-                    return ip
-            except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout):
-                pass
-            await asyncio.sleep(HEALTH_POLL_INTERVAL)
-    raise RuntimeError("WhisperX did not become healthy in time")
-
-
-async def ensure_ollama_ready() -> str:
-    ip = await ensure_gpu_running()
-    url = f"http://{ip}:{OLLAMA_PORT}/api/tags"
-    log.info(f"Waiting for Ollama at {url}...")
-    async with httpx.AsyncClient(timeout=10) as client:
-        for _ in range(BOOT_TIMEOUT // HEALTH_POLL_INTERVAL):
-            try:
-                resp = await client.get(url)
-                if resp.status_code == 200:
-                    log.info("Ollama is healthy!")
-                    return ip
-            except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout):
-                pass
-            await asyncio.sleep(HEALTH_POLL_INTERVAL)
-    raise RuntimeError("Ollama did not become healthy in time")
-
-
-async def idle_shutdown_loop():
-    while True:
-        await asyncio.sleep(60)
-        if last_request_time == 0 or active_zone is None:
-            continue
-        if active_requests > 0:
-            continue
-        elapsed = time.time() - last_request_time
-        if elapsed >= IDLE_TIMEOUT:
-            try:
-                zone = active_zone["zone"]
-                instance = active_zone["instance"]
-                label = active_zone["label"]
-                info = await get_instance_info(zone, instance)
-                if info and info.get("status") == "RUNNING":
-                    log.info(f"Idle {int(elapsed)}s -- stopping {label}")
-                    await stop_instance_in_zone(zone, instance)
-                    track_gpu_time()
-            except Exception as e:
-                log.error(f"Error stopping: {e}")
-
-
-# --- Endpoints ---
-
-@app.on_event("startup")
-async def on_startup():
-    global shutdown_task, _startup_lock
-    _startup_lock = asyncio.Lock()
-    await get_access_token()
-    shutdown_task = asyncio.create_task(idle_shutdown_loop())
-    zones = ", ".join(c["label"] for c in ZONE_FALLBACKS)
-    log.info(f"DictIA ASR Proxy started. Zones: [{zones}]. Idle: {IDLE_TIMEOUT}s, limit: {MONTHLY_LIMIT_HOURS}h")
-
-
-@app.post("/asr")
-async def asr_proxy(request: Request):
-    global last_request_time, active_requests
-
-    body = await request.body()
-    headers = {
-        k: v for k, v in request.headers.items()
-        if k.lower() not in ("host", "transfer-encoding")
-    }
-
-    last_request_time = time.time()
-    active_requests += 1
-    start_time = time.time()
-    result_status = 200
-    try:
-        ip = await ensure_gpu_ready()
-        target = f"http://{ip}:{WHISPERX_PORT}/asr"
-        log.info(f"Forwarding {len(body)} bytes to {target}")
-        async with httpx.AsyncClient(timeout=httpx.Timeout(7200.0)) as client:
-            resp = await client.post(target, content=body, headers=headers)
-            last_request_time = time.time()
-            result_status = resp.status_code
-            ct = resp.headers.get("content-type", "")
-            if "application/json" in ct:
-                return JSONResponse(content=resp.json(), status_code=resp.status_code)
-            else:
-                return JSONResponse(content=resp.text, status_code=resp.status_code)
-    except httpx.ReadTimeout:
-        result_status = 504
-        return JSONResponse({"error": "Transcription timeout (2h)"}, status_code=504)
-    except Exception as e:
-        result_status = 502
-        log.error(f"Proxy error: {e}")
-        return JSONResponse({"error": str(e)}, status_code=502)
-    finally:
-        active_requests -= 1
-        last_request_time = time.time()
-        request_history.insert(0, {
-            "time": time.strftime("%Y-%m-%dT%H:%M:%S"),
-            "type": "ASR",
-            "duration_sec": round(time.time() - start_time, 1),
-            "status": result_status,
-            "zone": active_zone["label"] if active_zone else "none",
-        })
-        if len(request_history) > MAX_HISTORY:
-            request_history.pop()
-
-
-@app.get("/health")
-async def health():
-    zone_label = active_zone["label"] if active_zone else "none"
-    gpu_status = "unknown"
-    if active_zone:
-        try:
-            info = await get_instance_info(active_zone["zone"], active_zone["instance"])
-            gpu_status = info.get("status", "unknown") if info else "not_found"
-        except Exception:
-            pass
-    ok, hours = check_budget()
-    stats = load_stats()
-    return {
-        "proxy": "healthy",
-        "gpu_instance": gpu_status,
-        "gpu_zone": zone_label,
-        "active_requests": active_requests,
-        "idle_timeout": IDLE_TIMEOUT,
-        "usage": {
-            "month": stats.get("month"),
-            "gpu_hours": round(hours, 2),
-            "gpu_limit_hours": MONTHLY_LIMIT_HOURS,
-            "requests_count": stats.get("requests", 0),
-            "budget_ok": ok,
-        },
-        "gpu_ip": gpu_ip,
-        "machine_type": active_zone.get("machine_type", "unknown") if active_zone else "unknown",
-        "gpu_model": active_zone.get("accelerator", "unknown") if active_zone else "unknown",
-        "idle_seconds": round(time.time() - last_request_time) if last_request_time > 0 else 0,
-        "auto_shutdown_in": max(0, IDLE_TIMEOUT - round(time.time() - last_request_time)) if last_request_time > 0 and active_zone else None,
-        "token_expires_in": round(_token_expiry - time.time()) if _token_expiry > 0 else None,
-    }
-
-
-@app.get("/stats")
-async def get_stats():
-    stats = load_stats()
-    hours = stats.get("gpu_seconds", 0) / 3600
-    gpu_cost = hours * GPU_COST_PER_HOUR
-    total_cost = gpu_cost + FIXED_MONTHLY_COST
-    return {
-        "month": stats.get("month"),
-        "gpu_hours": round(hours, 2),
-        "gpu_minutes": round(hours * 60, 1),
-        "estimated_cost_usd": round(total_cost, 2),
-        "gpu_cost_usd": round(gpu_cost, 2),
-        "fixed_cost_usd": FIXED_MONTHLY_COST,
-        "monthly_limit_hours": MONTHLY_LIMIT_HOURS,
-        "remaining_hours": round(MONTHLY_LIMIT_HOURS - hours, 2),
-        "requests_count": stats.get("requests", 0),
-        "active_zone": stats.get("active_zone", "none"),
-        "cost_per_hour": GPU_COST_PER_HOUR,
-        "recent_requests": request_history[:10],
-        "zone_fallbacks": [
-            {
-                "label": config["label"],
-                "zone": config["zone"],
-                "machine": config["machine_type"],
-                "gpu": config["accelerator"],
-                **zone_status.get(config["label"], {"status": "unknown", "last_tried": None, "last_error": None}),
-            }
-            for config in ZONE_FALLBACKS
-        ],
-    }
-
-
-@app.post("/gpu/start")
-async def gpu_start():
-    try:
-        ip = await ensure_gpu_ready()
-        label = active_zone["label"] if active_zone else "unknown"
-        return {"status": "running", "ip": ip, "zone": label}
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=503)
-
-
-@app.post("/gpu/stop")
-async def gpu_stop():
-    if not active_zone:
-        return {"status": "no active instance"}
-    try:
-        await stop_instance_in_zone(active_zone["zone"], active_zone["instance"])
-        track_gpu_time()
-        return {"status": "stopped", "zone": active_zone["label"]}
-    except Exception as e:
-        return JSONResponse({"error": str(e)}, status_code=500)
-
-
-DASHBOARD_HTML = Path(__file__).parent / "dashboard.html"
-
-
-@app.get("/", response_class=HTMLResponse)
-async def dashboard():
-    if DASHBOARD_HTML.exists():
-        return HTMLResponse(DASHBOARD_HTML.read_text(encoding="utf-8"))
-    return HTMLResponse("<h1>Dashboard not found</h1><p>Place dashboard.html next to proxy.py</p>", status_code=404)
-
-
-@app.api_route("/v1/{path:path}", methods=["POST", "GET"])
-async def llm_proxy(request: Request, path: str):
-    global last_request_time, active_requests
-
-    body = await request.body()
-    headers = {
-        k: v for k, v in request.headers.items()
-        if k.lower() not in ("host", "transfer-encoding")
-    }
-
-    last_request_time = time.time()
-    active_requests += 1
-    start_time = time.time()
-    result_status = 200
-    try:
-        ip = await ensure_ollama_ready()
-        target = f"http://{ip}:{OLLAMA_PORT}/v1/{path}"
-        log.info(f"Forwarding LLM request to {target}")
-        async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client:
-            resp = await client.request(request.method, target, content=body, headers=headers)
-            last_request_time = time.time()
-            result_status = resp.status_code
-            return Response(
-                content=resp.content,
-                status_code=resp.status_code,
-                media_type=resp.headers.get("content-type"),
-            )
-    except httpx.ReadTimeout:
-        result_status = 504
-        return JSONResponse({"error": "LLM timeout (5min)"}, status_code=504)
-    except Exception as e:
-        result_status = 502
-        log.error(f"LLM proxy error: {e}")
-        return JSONResponse({"error": str(e)}, status_code=502)
-    finally:
-        active_requests -= 1
-        last_request_time = time.time()
-        request_history.insert(0, {
-            "time": time.strftime("%Y-%m-%dT%H:%M:%S"),
-            "type": "LLM",
-            "duration_sec": round(time.time() - start_time, 1),
-            "status": result_status,
-            "zone": active_zone["label"] if active_zone else "none",
-        })
-        if len(request_history) > MAX_HISTORY:
-            request_history.pop()
-
-
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=9090)
diff --git a/deployment/asr-proxy/requirements.txt b/deployment/asr-proxy/requirements.txt
deleted file mode 100644
index f301f93..0000000
--- a/deployment/asr-proxy/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-fastapi==0.115.0
-uvicorn==0.30.0
-httpx==0.27.0
-PyJWT==2.9.0
-cryptography>=43.0.0
diff --git a/deployment/asr-proxy/setup.sh b/deployment/asr-proxy/setup.sh
deleted file mode 100644
index f0d88f8..0000000
--- a/deployment/asr-proxy/setup.sh
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env bash
-# DictIA ASR Proxy — Setup script
-# Installs the GCP GPU proxy for cloud deployments.
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-INSTALL_DIR="${ASR_PROXY_DIR:-$SCRIPT_DIR}"
-SERVICE_USER="${ASR_PROXY_USER:-$(whoami)}"
-
-echo "=== DictIA ASR Proxy Setup ==="
-echo "Install directory: $INSTALL_DIR"
-echo "Service user: $SERVICE_USER"
-echo
-
-# 1. Create virtual environment
-if [ ! -d "$INSTALL_DIR/venv" ]; then
-    echo "[1/4] Creating Python virtual environment..."
-    python3 -m venv "$INSTALL_DIR/venv"
-else
-    echo "[1/4] Virtual environment already exists."
-fi
-
-# 2. Install dependencies
-echo "[2/4] Installing Python dependencies..."
-"$INSTALL_DIR/venv/bin/pip" install --quiet --upgrade pip
-"$INSTALL_DIR/venv/bin/pip" install --quiet -r "$INSTALL_DIR/requirements.txt"
-
-# 3. GCP credentials
-if [ ! -f "$INSTALL_DIR/gcp-credentials.json" ]; then
-    echo "[3/4] GCP credentials not found."
-    echo "  Place your GCP service account or OAuth credentials at:"
-    echo "  $INSTALL_DIR/gcp-credentials.json"
-    echo
-    echo "  For service account: download JSON from GCP Console > IAM > Service Accounts"
-    echo "  For user credentials: run 'gcloud auth application-default login' and copy the file"
-    echo
-    read -rp "  Path to credentials file (or press Enter to skip): " CREDS_PATH
-    if [ -n "$CREDS_PATH" ] && [ -f "$CREDS_PATH" ]; then
-        cp "$CREDS_PATH" "$INSTALL_DIR/gcp-credentials.json"
-        chmod 600 "$INSTALL_DIR/gcp-credentials.json"
-        echo "  Credentials copied."
-    else
-        echo "  Skipped. You must add credentials before starting the proxy."
-    fi
-else
-    echo "[3/4] GCP credentials found."
-fi
-
-# 4. Install systemd service
-echo "[4/4] Installing systemd service..."
-SERVICE_FILE="/etc/systemd/system/asr-proxy.service"
-
-cat > /tmp/asr-proxy.service <<UNIT
-[Unit]
-Description=DictIA ASR Proxy - GPU Auto-Start/Stop for WhisperX
-After=network.target
-
-[Service]
-Type=simple
-User=$SERVICE_USER
-Restart=always
-RestartSec=10
-WorkingDirectory=$INSTALL_DIR
-ExecStart=$INSTALL_DIR/venv/bin/python proxy.py
-Environment=GOOGLE_APPLICATION_CREDENTIALS=$INSTALL_DIR/gcp-credentials.json
-Environment=STATS_FILE=$INSTALL_DIR/usage-stats.json
-
-[Install]
-WantedBy=multi-user.target
-UNIT
-
-if [ "$(id -u)" -eq 0 ]; then
-    cp /tmp/asr-proxy.service "$SERVICE_FILE"
-    systemctl daemon-reload
-    systemctl enable asr-proxy.service
-    echo "  Service installed and enabled."
-    echo "  Start with: systemctl start asr-proxy"
-else
-    echo "  Run as root to install systemd service, or copy manually:"
-    echo "  sudo cp /tmp/asr-proxy.service $SERVICE_FILE"
-    echo "  sudo systemctl daemon-reload && sudo systemctl enable asr-proxy"
-fi
-
-echo
-echo "=== Setup complete ==="
-echo "Dashboard: http://localhost:9090"
-echo "Health:    http://localhost:9090/health"
diff --git a/deployment/config/nginx/dictia.conf b/deployment/config/nginx/dictia.conf
deleted file mode 100644
index af42aaf..0000000
--- a/deployment/config/nginx/dictia.conf
+++ /dev/null
@@ -1,83 +0,0 @@
-# DictIA — Nginx reverse proxy configuration
-#
-# Alternative to Tailscale Serve for exposing DictIA over HTTPS.
-# Replace YOUR_DOMAIN with your actual domain name.
-#
-# Install: sudo cp dictia.conf /etc/nginx/sites-available/dictia
-#          sudo ln -s /etc/nginx/sites-available/dictia /etc/nginx/sites-enabled/
-#          sudo nginx -t && sudo systemctl reload nginx
-#
-# For HTTPS with Let's Encrypt:
-#   sudo certbot --nginx -d YOUR_DOMAIN
-
-upstream dictia_app {
-    server 127.0.0.1:8899;
-}
-
-upstream asr_proxy {
-    server 127.0.0.1:9090;
-}
-
-server {
-    listen 80;
-    server_name YOUR_DOMAIN;
-
-    # Redirect HTTP to HTTPS (uncomment after certbot setup)
-    # return 301 https://$host$request_uri;
-
-    client_max_body_size 500M;
-
-    # DictIA app
-    location / {
-        proxy_pass http://dictia_app;
-        proxy_set_header Host $host;
-        proxy_set_header X-Real-IP $remote_addr;
-        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-        proxy_set_header X-Forwarded-Proto $scheme;
-
-        # WebSocket support (for real-time features)
-        proxy_http_version 1.1;
-        proxy_set_header Upgrade $http_upgrade;
-        proxy_set_header Connection "upgrade";
-
-        # Long timeouts for transcription uploads
-        proxy_read_timeout 3600s;
-        proxy_send_timeout 3600s;
-        proxy_connect_timeout 60s;
-    }
-
-    # ASR Proxy dashboard (optional, restrict access)
-    location /asr-proxy/ {
-        proxy_pass http://asr_proxy/;
-        proxy_set_header Host $host;
-        proxy_set_header X-Real-IP $remote_addr;
-        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-        proxy_set_header X-Forwarded-Proto $scheme;
-    }
-}
-
-# HTTPS server block (managed by certbot, uncomment after setup)
-# server {
-#     listen 443 ssl;
-#     server_name YOUR_DOMAIN;
-#
-#     ssl_certificate /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem;
-#     ssl_certificate_key /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem;
-#     include /etc/letsencrypt/options-ssl-nginx.conf;
-#     ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;
-#
-#     client_max_body_size 500M;
-#
-#     location / {
-#         proxy_pass http://dictia_app;
-#         proxy_set_header Host $host;
-#         proxy_set_header X-Real-IP $remote_addr;
-#         proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-#         proxy_set_header X-Forwarded-Proto $scheme;
-#         proxy_http_version 1.1;
-#         proxy_set_header Upgrade $http_upgrade;
-#         proxy_set_header Connection "upgrade";
-#         proxy_read_timeout 3600s;
-#         proxy_send_timeout 3600s;
-#     }
-# }
diff --git a/deployment/config/systemd/dictia.service b/deployment/config/systemd/dictia.service
deleted file mode 100644
index b6cb36d..0000000
--- a/deployment/config/systemd/dictia.service
+++ /dev/null
@@ -1,15 +0,0 @@
-[Unit]
-Description=DictIA - Docker Compose Application
-After=docker.service
-Requires=docker.service
-
-[Service]
-Type=oneshot
-RemainAfterExit=yes
-WorkingDirectory=/opt/dictia
-ExecStart=/usr/bin/docker compose -f deployment/docker/docker-compose.cloud.yml up -d
-ExecStop=/usr/bin/docker compose -f deployment/docker/docker-compose.cloud.yml down
-TimeoutStartSec=120
-
-[Install]
-WantedBy=multi-user.target
diff --git a/deployment/config/tailscale/setup-serve.sh b/deployment/config/tailscale/setup-serve.sh
deleted file mode 100644
index e05f4ce..0000000
--- a/deployment/config/tailscale/setup-serve.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — Tailscale Serve/Funnel setup
-#
-# Exposes DictIA and ASR Proxy dashboard via Tailscale HTTPS.
-# Based on the VPS production configuration.
-#
-# Usage:
-#   bash setup-serve.sh [serve|funnel]
-#     serve  — accessible only within your tailnet (default)
-#     funnel — accessible from the public internet
-set -euo pipefail
-
-MODE="${1:-serve}"
-
-echo "=== DictIA Tailscale Setup ==="
-echo "Mode: $MODE"
-echo
-
-# Verify Tailscale is connected
-if ! tailscale status >/dev/null 2>&1; then
-    echo "ERROR: Tailscale is not running or not connected."
-    echo "  Install: curl -fsSL https://tailscale.com/install.sh | sh"
-    echo "  Connect: sudo tailscale up"
-    exit 1
-fi
-
-HOSTNAME=$(tailscale status --json | python3 -c "import sys,json; print(json.load(sys.stdin)['Self']['DNSName'].rstrip('.'))" 2>/dev/null || echo "unknown")
-echo "Tailscale hostname: $HOSTNAME"
-echo
-
-# DictIA app on :443 → localhost:8899
-echo "[1/2] Setting up DictIA app (port 443 → 8899)..."
-if [ "$MODE" = "funnel" ]; then
-    tailscale funnel --bg --https=443 http://localhost:8899
-else
-    tailscale serve --bg --https=443 http://localhost:8899
-fi
-
-# ASR Proxy dashboard on :9443 → localhost:9090
-echo "[2/2] Setting up ASR Proxy dashboard (port 9443 → 9090)..."
-if [ "$MODE" = "funnel" ]; then
-    tailscale funnel --bg --https=9443 http://localhost:9090
-else
-    tailscale serve --bg --https=9443 http://localhost:9090
-fi
-
-echo
-echo "=== Setup complete ==="
-echo "DictIA:        https://$HOSTNAME/"
-echo "ASR Dashboard: https://$HOSTNAME:9443/"
-echo
-echo "Verify with: tailscale serve status"
diff --git a/deployment/docker/.env.example b/deployment/docker/.env.example
deleted file mode 100644
index fc204f9..0000000
--- a/deployment/docker/.env.example
+++ /dev/null
@@ -1,124 +0,0 @@
-# =============================================================================
-# DictIA — Unified Environment Configuration
-# =============================================================================
-#
-# Copy this file to the project root as .env and edit the values.
-#   cp deployment/docker/.env.example .env
-#
-# This template combines upstream settings with DictIA deployment vars.
-# See: config/env.transcription.example for full upstream documentation.
-
-# =============================================================================
-# FLASK SECRET KEY (REQUIRED — auto-generated by setup.sh)
-# =============================================================================
-SECRET_KEY=change-me-to-a-random-string
-
-# =============================================================================
-# DEPLOYMENT PROFILE (used by deployment scripts)
-# =============================================================================
-# Options: cloud, local-cpu, local-gpu
-DICTIA_PROFILE=cloud
-
-# =============================================================================
-# TEXT GENERATION MODEL (REQUIRED for summaries, titles, chat)
-# =============================================================================
-TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
-TEXT_MODEL_API_KEY=your_openrouter_api_key
-TEXT_MODEL_NAME=openai/gpt-4o-mini
-
-# =============================================================================
-# TRANSCRIPTION CONFIGURATION
-# =============================================================================
-# For cloud profile (ASR Proxy → GCP GPU):
-#   ASR_BASE_URL is set automatically in docker-compose.cloud.yml
-#   No need to set it here.
-#
-# For local profiles (WhisperX sidecar):
-#   ASR_BASE_URL is set automatically in docker-compose.local-*.yml
-#   No need to set it here.
-#
-# For OpenAI API instead of self-hosted ASR:
-# TRANSCRIPTION_API_KEY=sk-your_openai_api_key
-# TRANSCRIPTION_MODEL=gpt-4o-transcribe-diarize
-
-# ASR model (for local WhisperX profiles)
-ASR_MODEL=large-v3
-
-# HuggingFace token (required for diarization with pyannote)
-# Get yours at: https://huggingface.co/settings/tokens
-# Must accept: https://huggingface.co/pyannote/speaker-diarization-3.1
-HF_TOKEN=
-
-# =============================================================================
-# ASR PROXY — CLOUD PROFILE ONLY
-# =============================================================================
-# GCP project for GPU instances
-# GCP_PROJECT=your-gcp-project
-
-# Monthly GPU budget limit in hours (default: 50)
-# MONTHLY_LIMIT_HOURS=50
-
-# Idle timeout before auto-stopping GPU (seconds, default: 300)
-# IDLE_TIMEOUT=300
-
-# =============================================================================
-# APPLICATION SETTINGS
-# =============================================================================
-ADMIN_USERNAME=admin
-ADMIN_EMAIL=admin@example.com
-ADMIN_PASSWORD=changeme
-
-ALLOW_REGISTRATION=false
-TIMEZONE="America/Toronto"
-LOG_LEVEL=ERROR
-LOCALE=fr_CA
-DEFAULT_LANGUAGE=fr
-SHOW_USERNAMES_IN_UI=true
-SESSION_COOKIE_HTTPONLY=true
-SESSION_COOKIE_SAMESITE=Lax
-SESSION_COOKIE_SECURE=true
-
-# =============================================================================
-# OPTIONAL FEATURES
-# =============================================================================
-ENABLE_INQUIRE_MODE=false
-ENABLE_AUTO_PROCESSING=false
-ENABLE_AUTO_EXPORT=false
-ENABLE_AUTO_DELETION=false
-ENABLE_INTERNAL_SHARING=true
-ENABLE_PUBLIC_SHARING=true
-ENABLE_FOLDERS=true
-VIDEO_RETENTION=true
-USERS_CAN_DELETE=true
-
-# =============================================================================
-# BACKGROUND PROCESSING
-# =============================================================================
-JOB_QUEUE_WORKERS=4
-SUMMARY_QUEUE_WORKERS=4
-JOB_MAX_RETRIES=3
-MAX_CONCURRENT_UPLOADS=3
-
-# =============================================================================
-# TRANSCRIPTION SETTINGS
-# =============================================================================
-TRANSCRIPTION_CONNECTOR=asr_endpoint
-USE_NEW_TRANSCRIPTION_ARCHITECTURE=true
-ENABLE_CHUNKING=true
-CHUNK_LIMIT=2400s
-CHUNK_OVERLAP_SECONDS=5
-
-# =============================================================================
-# LLM / SUMMARY SETTINGS
-# =============================================================================
-SUMMARY_LANGUAGE=fr
-SUMMARY_MAX_TOKENS=16000
-CHAT_MAX_TOKENS=12000
-ENABLE_STREAM_OPTIONS=false
-ENABLE_THINKING=false
-
-# =============================================================================
-# DOCKER/DATABASE
-# =============================================================================
-SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
-UPLOAD_FOLDER=/data/uploads
diff --git a/deployment/docker/docker-compose.cloud.yml b/deployment/docker/docker-compose.cloud.yml
deleted file mode 100644
index d4ae233..0000000
--- a/deployment/docker/docker-compose.cloud.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-# DictIA — Cloud deployment (VPS + ASR Proxy GCP GPU)
-#
-# Usage:
-#   docker compose -f deployment/docker/docker-compose.cloud.yml up -d
-#
-# ASR is handled by the external asr-proxy (port 9090) which auto-starts
-# a GCP GPU instance on demand. DictIA connects via host.docker.internal.
-
-services:
-  dictia:
-    build:
-      context: ../..
-      dockerfile: Dockerfile
-    image: innova-ai/dictia:latest
-    container_name: dictia
-    restart: unless-stopped
-    ports:
-      - "8899:8899"
-    env_file:
-      - ../../.env
-    environment:
-      - LOG_LEVEL=${LOG_LEVEL:-ERROR}
-      - ASR_BASE_URL=http://host.docker.internal:9090
-    volumes:
-      - ../../data/uploads:/data/uploads
-      - ../../data/instance:/data/instance
-    extra_hosts:
-      - "host.docker.internal:host-gateway"
-    healthcheck:
-      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-      start_period: 30s
-    networks:
-      - dictia-network
-
-networks:
-  dictia-network:
-    driver: bridge
diff --git a/deployment/docker/docker-compose.local-cpu.yml b/deployment/docker/docker-compose.local-cpu.yml
deleted file mode 100644
index 0a0f060..0000000
--- a/deployment/docker/docker-compose.local-cpu.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-# DictIA — Local CPU deployment (WhisperX on CPU + DictIA)
-#
-# Usage:
-#   docker compose -f deployment/docker/docker-compose.local-cpu.yml up -d
-#
-# Warning: CPU transcription is significantly slower than GPU.
-# Expect ~10x real-time for large files (e.g., 1h audio = ~10h processing).
-
-services:
-  whisperx-asr:
-    image: ghcr.io/jim60105/whisperx-asr:latest
-    container_name: whisperx-asr
-    restart: unless-stopped
-    ports:
-      - "9000:9000"
-    environment:
-      - ASR_MODEL=${ASR_MODEL:-large-v3}
-      - ASR_ENGINE=whisperx
-      - DEVICE=cpu
-      - COMPUTE_TYPE=float32
-      - HF_TOKEN=${HF_TOKEN:-}
-    volumes:
-      - whisperx-cache:/root/.cache
-    deploy:
-      resources:
-        limits:
-          memory: 18G
-    networks:
-      - dictia-network
-
-  dictia:
-    build:
-      context: ../..
-      dockerfile: Dockerfile
-    image: innova-ai/dictia:latest
-    container_name: dictia
-    restart: unless-stopped
-    ports:
-      - "8899:8899"
-    env_file:
-      - ../../.env
-    environment:
-      - LOG_LEVEL=${LOG_LEVEL:-ERROR}
-      - ASR_BASE_URL=http://whisperx-asr:9000
-    volumes:
-      - ../../data/uploads:/data/uploads
-      - ../../data/instance:/data/instance
-    depends_on:
-      - whisperx-asr
-    healthcheck:
-      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-      start_period: 30s
-    networks:
-      - dictia-network
-
-volumes:
-  whisperx-cache:
-
-networks:
-  dictia-network:
-    driver: bridge
diff --git a/deployment/docker/docker-compose.local-gpu.yml b/deployment/docker/docker-compose.local-gpu.yml
deleted file mode 100644
index 488fd74..0000000
--- a/deployment/docker/docker-compose.local-gpu.yml
+++ /dev/null
@@ -1,69 +0,0 @@
-# DictIA — Local GPU deployment (WhisperX on NVIDIA GPU + DictIA)
-#
-# Usage:
-#   docker compose -f deployment/docker/docker-compose.local-gpu.yml up -d
-#
-# Prerequisites:
-#   - NVIDIA GPU with CUDA support
-#   - nvidia-container-toolkit installed
-#   - Docker configured with nvidia runtime
-
-services:
-  whisperx-asr:
-    image: ghcr.io/jim60105/whisperx-asr:latest-cuda
-    container_name: whisperx-asr
-    restart: unless-stopped
-    ports:
-      - "9000:9000"
-    environment:
-      - ASR_MODEL=${ASR_MODEL:-large-v3}
-      - ASR_ENGINE=whisperx
-      - DEVICE=cuda
-      - COMPUTE_TYPE=float16
-      - HF_TOKEN=${HF_TOKEN:-}
-    volumes:
-      - whisperx-cache:/root/.cache
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    networks:
-      - dictia-network
-
-  dictia:
-    build:
-      context: ../..
-      dockerfile: Dockerfile
-    image: innova-ai/dictia:latest
-    container_name: dictia
-    restart: unless-stopped
-    ports:
-      - "8899:8899"
-    env_file:
-      - ../../.env
-    environment:
-      - LOG_LEVEL=${LOG_LEVEL:-ERROR}
-      - ASR_BASE_URL=http://whisperx-asr:9000
-    volumes:
-      - ../../data/uploads:/data/uploads
-      - ../../data/instance:/data/instance
-    depends_on:
-      - whisperx-asr
-    healthcheck:
-      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
-      interval: 30s
-      timeout: 10s
-      retries: 3
-      start_period: 30s
-    networks:
-      - dictia-network
-
-volumes:
-  whisperx-cache:
-
-networks:
-  dictia-network:
-    driver: bridge
diff --git a/deployment/docs/LOCAL-SETUP.md b/deployment/docs/LOCAL-SETUP.md
deleted file mode 100644
index f534972..0000000
--- a/deployment/docs/LOCAL-SETUP.md
+++ /dev/null
@@ -1,118 +0,0 @@
-# Setup Local — DictIA
-
-Guide pour deployer DictIA localement avec GPU NVIDIA ou CPU.
-
-## Profil local-gpu
-
-### Prerequis
-
-- NVIDIA GPU avec support CUDA
-- [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
-- Docker + Docker Compose V2
-- 8GB+ RAM (16GB recommande)
-- Token HuggingFace (pour la diarisation)
-
-### Installation nvidia-container-toolkit
-
-```bash
-# Ubuntu/Debian
-curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \
-    sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
-curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
-    sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
-    sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
-sudo apt-get update
-sudo apt-get install -y nvidia-container-toolkit
-sudo nvidia-ctk runtime configure --runtime=docker
-sudo systemctl restart docker
-
-# Verifier
-docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
-```
-
-### Setup DictIA
-
-```bash
-cd dictia
-bash deployment/setup.sh --profile local-gpu
-```
-
-Le setup va verifier:
-- nvidia-container-toolkit installe
-- GPU accessible depuis Docker
-- Assez de RAM disponible
-
-### Configuration du modele
-
-Par defaut, WhisperX utilise `large-v3`. Pour changer:
-
-```bash
-# Editer .env
-ASR_MODEL=large-v3      # Meilleure qualite
-# ASR_MODEL=medium       # Plus rapide, qualite correcte
-# ASR_MODEL=small        # Tres rapide, qualite reduite
-```
-
----
-
-## Profil local-cpu
-
-### Prerequis
-
-- Docker + Docker Compose V2
-- 18GB+ RAM (WhisperX CPU est gourmand)
-- Patience (transcription ~10x temps reel)
-
-### Setup
-
-```bash
-cd dictia
-bash deployment/setup.sh --profile local-cpu
-```
-
-### Limitations
-
-- Transcription lente: 1h d'audio prend ~10h
-- Utilise float32 (pas de GPU acceleration)
-- Limite memoire a 18GB par defaut
-- Recommande pour: tests, petits fichiers, demos
-
-Pour reduire l'utilisation memoire, utiliser un modele plus petit:
-
-```bash
-# Editer .env
-ASR_MODEL=small    # ou medium, base, tiny
-```
-
----
-
-## Verification
-
-```bash
-# Health check
-bash deployment/tools/health-check.sh
-
-# Test rapide: ouvrir le navigateur
-open http://localhost:8899
-
-# Verifier WhisperX
-curl http://localhost:9000/health
-```
-
-## Gestion des containers
-
-```bash
-COMPOSE_FILE=deployment/docker/docker-compose.local-gpu.yml  # ou local-cpu
-
-# Logs
-docker compose -f $COMPOSE_FILE logs -f
-
-# Redemarrer
-docker compose -f $COMPOSE_FILE restart
-
-# Arreter
-docker compose -f $COMPOSE_FILE down
-
-# Voir l'utilisation GPU
-nvidia-smi  # (profil GPU seulement)
-```
diff --git a/deployment/docs/MAINTENANCE.md b/deployment/docs/MAINTENANCE.md
deleted file mode 100644
index f43b963..0000000
--- a/deployment/docs/MAINTENANCE.md
+++ /dev/null
@@ -1,136 +0,0 @@
-# Maintenance — DictIA
-
-## Backup
-
-```bash
-# Backup complet (data, .env, volumes, stats ASR)
-bash deployment/tools/backup.sh
-
-# Backup dans un repertoire specifique
-bash deployment/tools/backup.sh /mnt/backups
-```
-
-Les backups sont sauvegardes dans `backups/` avec rotation automatique (garde les 5 derniers).
-
-Contenu d'un backup:
-- `data/` — uploads et base de donnees SQLite
-- `dot-env` — fichier de configuration
-- `asr-usage-stats.json` — stats d'utilisation GPU
-- `whisperx-cache.tar.gz` — cache modeles (si volume Docker)
-- `manifest.json` — metadonnees du backup
-
-### Schedule recommande
-
-| Frequence | Action |
-|-----------|--------|
-| Quotidien | `bash deployment/tools/backup.sh` |
-| Hebdomadaire | Copier le backup sur un stockage externe |
-| Mensuel | Verifier la restauration sur un environnement de test |
-
-Pour automatiser avec cron:
-
-```bash
-# Backup quotidien a 3h du matin
-0 3 * * * /opt/dictia/deployment/tools/backup.sh >> /var/log/dictia-backup.log 2>&1
-```
-
-## Restore
-
-```bash
-# Lister les backups disponibles
-ls -la backups/
-
-# Restaurer un backup
-bash deployment/tools/restore.sh backups/dictia-20260211-030000.tar.gz
-```
-
-Le script:
-1. Valide l'archive (presence du manifest)
-2. Demande confirmation
-3. Arrete les containers
-4. Restaure les fichiers
-5. Redemarre les containers
-
-## Mise a jour
-
-```bash
-# Mise a jour complete (git pull + rebuild + restart)
-bash deployment/tools/update.sh
-
-# Rebuild seulement (sans git pull)
-bash deployment/tools/update.sh --no-pull
-
-# Git pull seulement (sans rebuild)
-bash deployment/tools/update.sh --no-build
-```
-
-Le script:
-1. Detecte le profil actif automatiquement
-2. `git pull origin dictia-branding`
-3. `docker build -t innova-ai/dictia:latest .`
-4. Pull WhisperX upstream (profils locaux)
-5. `docker compose down && up -d`
-6. Attend le health check
-7. Nettoie les images dangling
-
-## Monitoring
-
-### Health check
-
-```bash
-# Diagnostic complet (humain)
-bash deployment/tools/health-check.sh
-
-# JSON (pour alertes/scripts)
-bash deployment/tools/health-check.sh --json
-
-# Code de sortie seulement (0=ok, 1=probleme)
-bash deployment/tools/health-check.sh --quiet
-```
-
-### Logs
-
-```bash
-# DictIA
-docker logs dictia -f --tail 100
-
-# WhisperX (profils locaux)
-docker logs whisperx-asr -f --tail 100
-
-# ASR Proxy (profil cloud)
-journalctl -u asr-proxy -f
-```
-
-### Dashboard GPU (profil cloud)
-
-Le dashboard de monitoring GPU est accessible a:
-- `http://localhost:9090` (local)
-- `https://votre-hostname.tailnet.ts.net:9443` (Tailscale)
-
-Affiche: statut GPU, cout mensuel, historique des requetes, zones de fallback.
-
-### Metriques cles
-
-```bash
-# Espace disque (les transcriptions grossissent)
-df -h /opt/dictia/data/
-
-# Utilisation memoire (WhisperX est gourmand)
-docker stats --no-stream
-
-# Stats GPU (profil cloud)
-curl -s http://localhost:9090/stats | python3 -m json.tool
-```
-
-## Maintenance Docker
-
-```bash
-# Nettoyer les images orphelines
-docker image prune -f
-
-# Nettoyer tout (attention: supprime les volumes non utilises)
-# docker system prune -a --volumes
-
-# Verifier l'espace Docker
-docker system df
-```
diff --git a/deployment/docs/QUICKSTART.md b/deployment/docs/QUICKSTART.md
deleted file mode 100644
index f057175..0000000
--- a/deployment/docs/QUICKSTART.md
+++ /dev/null
@@ -1,90 +0,0 @@
-# Quickstart — DictIA
-
-## Prerequis communs
-
-- Docker + Docker Compose V2
-- Git
-- 2GB+ RAM disponible
-
-```bash
-git clone https://gitea.innova-ai.ca/Innova-AI/dictia.git
-cd dictia
-git checkout dictia-branding
-```
-
----
-
-## Profil Cloud (VPS + GCP GPU)
-
-Le GPU demarre automatiquement quand quelqu'un transcrit, et s'arrete apres 5 min d'inactivite.
-
-```bash
-# 1. Setup interactif
-bash deployment/setup.sh --profile cloud
-
-# 2. Setup ASR Proxy (GCP credentials requises)
-bash deployment/asr-proxy/setup.sh
-
-# 3. Optionnel: Tailscale Serve pour HTTPS
-bash deployment/config/tailscale/setup-serve.sh
-```
-
-**Requis**: credentials GCP (service account ou OAuth) dans `deployment/asr-proxy/gcp-credentials.json`.
-
----
-
-## Profil Local GPU
-
-Transcription locale sur GPU NVIDIA. Le plus rapide.
-
-```bash
-# Prerequis: nvidia-container-toolkit
-# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html
-
-# Setup
-bash deployment/setup.sh --profile local-gpu
-```
-
-**Requis**: token HuggingFace pour la diarisation (pyannote).
-
----
-
-## Profil Local CPU
-
-Transcription sur CPU. Lent mais fonctionnel pour tester.
-
-```bash
-bash deployment/setup.sh --profile local-cpu
-```
-
-Prevoir ~10x le temps reel (1h audio = ~10h de traitement).
-
----
-
-## Apres l'installation
-
-```bash
-# Verifier que tout fonctionne
-bash deployment/tools/health-check.sh
-
-# Ouvrir DictIA
-open http://localhost:8899
-```
-
-Se connecter avec les identifiants admin configures pendant le setup.
-
-## Commandes utiles
-
-```bash
-# Logs en temps reel
-docker compose -f deployment/docker/docker-compose.<profil>.yml logs -f
-
-# Redemarrer
-docker compose -f deployment/docker/docker-compose.<profil>.yml restart
-
-# Mise a jour
-bash deployment/tools/update.sh
-
-# Backup
-bash deployment/tools/backup.sh
-```
diff --git a/deployment/docs/TROUBLESHOOTING.md b/deployment/docs/TROUBLESHOOTING.md
deleted file mode 100644
index d733a7c..0000000
--- a/deployment/docs/TROUBLESHOOTING.md
+++ /dev/null
@@ -1,177 +0,0 @@
-# Troubleshooting — DictIA
-
-## WhisperX OOM (Out of Memory)
-
-**Symptome**: Container `whisperx-asr` crash ou restart en boucle.
-
-**Cause**: Modele trop gros pour la RAM/VRAM disponible.
-
-**Solutions**:
-```bash
-# Utiliser un modele plus petit dans .env
-ASR_MODEL=medium   # au lieu de large-v3
-
-# Augmenter la limite memoire (local-cpu)
-# Editer docker-compose.local-cpu.yml
-deploy:
-  resources:
-    limits:
-      memory: 24G   # au lieu de 18G
-```
-
-## Diarisation 403 Forbidden
-
-**Symptome**: Erreur 403 lors de la transcription avec diarisation.
-
-**Cause**: Token HuggingFace manquant ou conditions non acceptees.
-
-**Solution**:
-1. Creer un token: https://huggingface.co/settings/tokens
-2. Accepter les conditions: https://huggingface.co/pyannote/speaker-diarization-3.1
-3. Ajouter dans `.env`:
-```bash
-HF_TOKEN=hf_votre_token
-```
-4. Redemarrer: `docker compose -f deployment/docker/docker-compose.<profil>.yml restart`
-
-## GPU non detecte (local-gpu)
-
-**Symptome**: `nvidia-smi` fonctionne mais Docker ne voit pas le GPU.
-
-**Solution**:
-```bash
-# Installer nvidia-container-toolkit
-sudo apt install -y nvidia-container-toolkit
-sudo nvidia-ctk runtime configure --runtime=docker
-sudo systemctl restart docker
-
-# Verifier
-docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
-```
-
-## Upload echoue (fichiers volumineux)
-
-**Symptome**: Upload de gros fichiers (>100MB) echoue.
-
-**Causes possibles**:
-- Timeout Nginx/reverse proxy
-- Limite upload trop basse
-
-**Solutions**:
-```bash
-# Si Nginx: verifier client_max_body_size dans dictia.conf
-client_max_body_size 500M;
-
-# Si Tailscale Serve: pas de limite cote Tailscale
-
-# Timeout gunicorn (dans le Dockerfile, deja a 600s)
-# Pour des fichiers tres longs, augmenter dans docker-compose:
-environment:
-  - GUNICORN_TIMEOUT=1200
-```
-
-## Container dictia "unhealthy"
-
-**Symptome**: `docker ps` montre "unhealthy" pour le container dictia.
-
-**Diagnostic**:
-```bash
-# Voir les logs
-docker logs dictia --tail 50
-
-# Tester manuellement
-docker exec dictia python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"
-```
-
-**Causes courantes**:
-- `.env` mal configure (SECRET_KEY manquant)
-- Base de donnees corrompue (restaurer backup)
-- Port 8899 deja utilise
-
-## ASR Proxy: "No GPU available"
-
-**Symptome**: Transcription echoue avec "No GPU available in any Canadian zone".
-
-**Causes**:
-- GCP n'a pas de GPU disponible (capacite epuisee)
-- Credentials GCP expirees
-- Budget mensuel atteint
-
-**Diagnostic**:
-```bash
-# Verifier le statut du proxy
-curl -s http://localhost:9090/health | python3 -m json.tool
-
-# Verifier les stats (budget)
-curl -s http://localhost:9090/stats | python3 -m json.tool
-
-# Voir les logs
-journalctl -u asr-proxy --since "1 hour ago"
-```
-
-**Solutions**:
-- Attendre (GCP libere des GPUs regulierement)
-- Le proxy reessaie automatiquement apres un cooldown de 3 minutes
-- Verifier le dashboard: http://localhost:9090
-
-## Build Docker lent/echoue
-
-**Symptome**: `docker build` prend trop de temps ou echoue.
-
-**Solutions**:
-```bash
-# Limiter les ressources si le VPS est petit
-docker build --memory=2g --cpus=2 -t innova-ai/dictia:latest .
-
-# Nettoyer le cache Docker si le disque est plein
-docker builder prune -f
-docker image prune -f
-```
-
-## Base de donnees corrompue
-
-**Symptome**: Erreur SQLite au demarrage.
-
-**Solution**:
-```bash
-# Restaurer le dernier backup
-bash deployment/tools/restore.sh backups/dictia-LATEST.tar.gz
-
-# Ou recreer la base (perd les donnees)
-rm data/instance/transcriptions.db
-docker compose -f deployment/docker/docker-compose.<profil>.yml restart
-```
-
-## Port 8899 deja utilise
-
-```bash
-# Trouver qui utilise le port
-sudo lsof -i :8899
-# ou
-sudo ss -tlnp | grep 8899
-
-# Arreter le processus ou changer le port dans docker-compose
-ports:
-  - "8900:8899"   # utiliser 8900 a la place
-```
-
-## Mise a jour qui casse tout
-
-```bash
-# Rollback: revenir au commit precedent
-cd dictia
-git log --oneline -5  # trouver le bon commit
-git checkout <commit-hash>
-
-# Rebuild et redemarrer
-docker build -t innova-ai/dictia:latest .
-docker compose -f deployment/docker/docker-compose.<profil>.yml down
-docker compose -f deployment/docker/docker-compose.<profil>.yml up -d
-```
-
-## Commande de diagnostic rapide
-
-```bash
-# Tout verifier d'un coup
-bash deployment/tools/health-check.sh --json | python3 -m json.tool
-```
diff --git a/deployment/docs/VPS-SETUP.md b/deployment/docs/VPS-SETUP.md
deleted file mode 100644
index deff17d..0000000
--- a/deployment/docs/VPS-SETUP.md
+++ /dev/null
@@ -1,148 +0,0 @@
-# Setup VPS from scratch — DictIA
-
-Guide complet pour deployer DictIA sur un VPS Ubuntu.
-Teste sur OVH VPS avec Ubuntu 22.04/24.04.
-
-## 1. Preparation du VPS
-
-```bash
-# Mise a jour systeme
-sudo apt update && sudo apt upgrade -y
-
-# Installer les essentiels
-sudo apt install -y curl git
-```
-
-## 2. Docker
-
-```bash
-# Installer Docker (methode officielle)
-curl -fsSL https://get.docker.com | sh
-
-# Ajouter l'utilisateur au groupe docker
-sudo usermod -aG docker $USER
-
-# Se reconnecter pour appliquer le groupe
-exit
-# (reconnecter via SSH)
-
-# Verifier
-docker --version
-docker compose version
-```
-
-## 3. Tailscale (recommande)
-
-Tailscale fournit un VPN mesh pour acceder au VPS sans exposer de ports publics.
-
-```bash
-# Installer Tailscale
-curl -fsSL https://tailscale.com/install.sh | sh
-
-# Connecter au tailnet
-sudo tailscale up
-
-# Verifier
-tailscale status
-```
-
-## 4. DictIA
-
-```bash
-# Cloner le repo
-cd ~
-git clone https://gitea.innova-ai.ca/Innova-AI/dictia.git
-cd dictia
-git checkout dictia-branding
-
-# Lancer le setup
-bash deployment/setup.sh --profile cloud
-```
-
-Le setup va:
-- Generer le `.env` avec vos identifiants
-- Creer les repertoires de donnees
-- Builder l'image Docker
-- Demarrer les containers
-
-## 5. ASR Proxy (GCP GPU)
-
-```bash
-# Installer le proxy
-bash deployment/asr-proxy/setup.sh
-
-# Ajouter les credentials GCP
-# Copier votre fichier de credentials dans:
-cp ~/gcp-credentials.json deployment/asr-proxy/gcp-credentials.json
-
-# Demarrer le service
-sudo systemctl start asr-proxy
-sudo systemctl status asr-proxy
-```
-
-## 6. Securite
-
-```bash
-# Docker daemon config (log rotation)
-sudo cp deployment/security/docker-daemon.json /etc/docker/daemon.json
-sudo systemctl restart docker
-
-# Firewall iptables (bloque trafic non-Tailscale)
-sudo bash deployment/security/iptables-rules.sh
-
-# Service systemd pour les regles au boot
-sudo cp deployment/security/docker-iptables.service /etc/systemd/system/
-sudo systemctl daemon-reload
-sudo systemctl enable docker-iptables
-```
-
-## 7. Tailscale Serve (HTTPS)
-
-```bash
-# Expose DictIA et le dashboard ASR via Tailscale HTTPS
-bash deployment/config/tailscale/setup-serve.sh
-
-# Verifier
-tailscale serve status
-```
-
-DictIA sera accessible a `https://votre-hostname.tailnet.ts.net/`.
-
-## 8. Service systemd (auto-start)
-
-```bash
-# Adapter le chemin dans le fichier si necessaire
-sudo cp deployment/config/systemd/dictia.service /etc/systemd/system/
-sudo systemctl daemon-reload
-sudo systemctl enable dictia
-```
-
-## 9. Verification
-
-```bash
-# Health check complet
-bash deployment/tools/health-check.sh
-
-# Verifier les endpoints
-curl -s http://localhost:8899/health
-curl -s http://localhost:9090/health
-```
-
-## 10. Premier backup
-
-```bash
-bash deployment/tools/backup.sh
-```
-
----
-
-## Checklist post-installation
-
-- [ ] DictIA repond sur :8899
-- [ ] ASR Proxy repond sur :9090
-- [ ] Tailscale Serve configure
-- [ ] iptables: seul Tailscale peut acceder
-- [ ] Docker: log rotation configuree
-- [ ] Service systemd enable (auto-start au boot)
-- [ ] Premier backup effectue
-- [ ] Identifiants admin testes
diff --git a/deployment/profiles/docker-compose.dictia16.yml b/deployment/profiles/docker-compose.dictia16.yml
deleted file mode 100644
index a553bb7..0000000
--- a/deployment/profiles/docker-compose.dictia16.yml
+++ /dev/null
@@ -1,101 +0,0 @@
-# =============================================================================
-# DictIA 16 — Docker Compose
-# GPU : RTX 5070 Ti (16 Go VRAM)
-# =============================================================================
-#
-# Services :
-#   - dictia        : Application principale DictIA
-#   - whisperx-asr  : Service de transcription WhisperX Large-v3
-#   - ollama        : LLM local Mistral 7B (résumés, chat, Q&A)
-#
-# Démarrage :
-#   1. cp config/env.dictia16.example .env
-#   2. docker compose -f config/docker-compose.dictia16.yml up -d
-#   3. Télécharger Mistral : docker exec ollama ollama pull mistral
-#
-# Note : Aucune clé API nécessaire — tout tourne en local (100% privé).
-# =============================================================================
-
-services:
-
-  # ---------------------------------------------------------------------------
-  # Application DictIA
-  # ---------------------------------------------------------------------------
-  dictia:
-    image: dictia:latest
-    container_name: dictia
-    restart: unless-stopped
-    ports:
-      - "8899:8899"
-    env_file:
-      - ../.env
-    environment:
-      - LOG_LEVEL=ERROR
-    volumes:
-      - ../uploads:/data/uploads
-      - ../instance:/data/instance
-      # Décommenter pour l'export automatique :
-      # - ../exports:/data/exports
-      # Décommenter pour le traitement automatique :
-      # - ../auto-process:/data/auto-process
-    depends_on:
-      - whisperx-asr
-      - ollama
-    networks:
-      - dictia-net
-
-  # ---------------------------------------------------------------------------
-  # WhisperX ASR — Transcription locale (WhisperX Large-v3)
-  # RTX 5070 Ti : BATCH_SIZE=32, COMPUTE_TYPE=float16
-  # ---------------------------------------------------------------------------
-  whisperx-asr:
-    image: murtazanasir/whisperx-asr-service:latest
-    container_name: whisperx-asr
-    restart: unless-stopped
-    environment:
-      - HF_TOKEN=${HF_TOKEN}
-      - DEVICE=cuda
-      - COMPUTE_TYPE=float16
-      - BATCH_SIZE=32
-      - DEFAULT_MODEL=large-v3
-    volumes:
-      - whisperx-models:/root/.cache
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    networks:
-      - dictia-net
-
-  # ---------------------------------------------------------------------------
-  # Ollama — LLM local Mistral 7B
-  # Résumés, points d'action, Q&A — 100% local, aucune donnée externe
-  # ---------------------------------------------------------------------------
-  ollama:
-    image: ollama/ollama:latest
-    container_name: ollama
-    restart: unless-stopped
-    volumes:
-      - ollama-models:/root/.ollama
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    networks:
-      - dictia-net
-
-networks:
-  dictia-net:
-    driver: bridge
-
-volumes:
-  whisperx-models:
-    driver: local
-  ollama-models:
-    driver: local
diff --git a/deployment/profiles/docker-compose.dictia8.yml b/deployment/profiles/docker-compose.dictia8.yml
deleted file mode 100644
index dc4c6ed..0000000
--- a/deployment/profiles/docker-compose.dictia8.yml
+++ /dev/null
@@ -1,75 +0,0 @@
-# =============================================================================
-# DictIA 8 — Docker Compose
-# GPU : RTX 5060 (8 Go VRAM)
-# =============================================================================
-#
-# Services :
-#   - dictia        : Application principale DictIA
-#   - whisperx-asr  : Service de transcription WhisperX Large-v3
-#
-# Démarrage :
-#   1. cp config/env.dictia8.example .env
-#   2. Remplir TEXT_MODEL_API_KEY dans .env
-#   3. docker compose -f config/docker-compose.dictia8.yml up -d
-# =============================================================================
-
-services:
-
-  # ---------------------------------------------------------------------------
-  # Application DictIA
-  # ---------------------------------------------------------------------------
-  dictia:
-    image: dictia:latest
-    container_name: dictia
-    restart: unless-stopped
-    ports:
-      - "8899:8899"
-    env_file:
-      - ../.env
-    environment:
-      - LOG_LEVEL=ERROR
-    volumes:
-      - ../uploads:/data/uploads
-      - ../instance:/data/instance
-      # Décommenter pour l'export automatique :
-      # - ../exports:/data/exports
-      # Décommenter pour le traitement automatique :
-      # - ../auto-process:/data/auto-process
-    depends_on:
-      - whisperx-asr
-    networks:
-      - dictia-net
-
-  # ---------------------------------------------------------------------------
-  # WhisperX ASR — Transcription locale (WhisperX Large-v3)
-  # RTX 5060 : BATCH_SIZE=16, COMPUTE_TYPE=float16
-  # ---------------------------------------------------------------------------
-  whisperx-asr:
-    image: murtazanasir/whisperx-asr-service:latest
-    container_name: whisperx-asr
-    restart: unless-stopped
-    environment:
-      - HF_TOKEN=${HF_TOKEN}
-      - DEVICE=cuda
-      - COMPUTE_TYPE=float16
-      - BATCH_SIZE=16
-      - DEFAULT_MODEL=large-v3
-    volumes:
-      - whisperx-models:/root/.cache
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    networks:
-      - dictia-net
-
-networks:
-  dictia-net:
-    driver: bridge
-
-volumes:
-  whisperx-models:
-    driver: local
diff --git a/deployment/profiles/env.dictia16.example b/deployment/profiles/env.dictia16.example
deleted file mode 100644
index 8335fe1..0000000
--- a/deployment/profiles/env.dictia16.example
+++ /dev/null
@@ -1,134 +0,0 @@
-# =============================================================================
-# DictIA 16 — Configuration (.env)
-# GPU : RTX 5070 Ti (16 Go VRAM)
-# =============================================================================
-#
-# Architecture :
-#   - Transcription  : WhisperX Large-v3 (local, ~5,5 Go VRAM)
-#   - LLM (résumés)  : Mistral 7B local via Ollama (~6,4 Go VRAM)
-#   - Mode           : Séquentiel (transcription puis résumé)
-#   - Total VRAM     : ~11,9 Go / 16 Go (marge ~4,1 Go)
-#
-# Démarrage rapide :
-#   1. cp config/env.dictia16.example .env
-#   2. Aucune clé API nécessaire — tout tourne en local
-#   3. docker compose -f config/docker-compose.dictia16.yml up -d
-# =============================================================================
-
-# =============================================================================
-# MODÈLE DE TEXTE — Résumés, titres, chat (LLM LOCAL)
-# =============================================================================
-# DictIA 16 utilise Mistral 7B en local via Ollama.
-# Aucune donnée ne quitte le serveur — 100% privé.
-
-TEXT_MODEL_BASE_URL=http://ollama:11434/v1
-TEXT_MODEL_API_KEY=not-required
-TEXT_MODEL_NAME=mistral
-
-# --- Modèle de chat séparé (optionnel) ---
-# Même modèle par défaut, mais peut être changé pour un modèle plus rapide.
-# CHAT_MODEL_API_KEY=not-required
-# CHAT_MODEL_BASE_URL=http://ollama:11434/v1
-# CHAT_MODEL_NAME=mistral
-
-# =============================================================================
-# TRANSCRIPTION — WhisperX ASR local (REQUIS)
-# =============================================================================
-# WhisperX tourne en local dans un conteneur Docker séparé.
-# Le service ASR est défini dans docker-compose.dictia16.yml.
-
-ASR_BASE_URL=http://whisperx-asr:9000
-
-# Diarisation (identification automatique des locuteurs) — recommandé
-ASR_DIARIZE=true
-ASR_RETURN_SPEAKER_EMBEDDINGS=true
-
-# Nombre de locuteurs attendus (optionnel — aide la précision)
-# ASR_MIN_SPEAKERS=1
-# ASR_MAX_SPEAKERS=6
-
-# =============================================================================
-# PARAMÈTRES ADMINISTRATEUR
-# =============================================================================
-ADMIN_USERNAME=admin
-ADMIN_EMAIL=admin@votreentreprise.com
-ADMIN_PASSWORD=changeme
-
-# =============================================================================
-# ACCÈS ET INSCRIPTION
-# =============================================================================
-# Désactiver l'inscription publique (accès sur invitation uniquement)
-ALLOW_REGISTRATION=false
-
-# Restreindre l'inscription aux domaines autorisés
-# Exemple : REGISTRATION_ALLOWED_DOMAINS=votreentreprise.com
-REGISTRATION_ALLOWED_DOMAINS=
-
-# =============================================================================
-# FUSEAU HORAIRE
-# =============================================================================
-# Exemples : America/Toronto, America/Montreal, America/New_York, UTC
-TIMEZONE="America/Toronto"
-
-# =============================================================================
-# LIMITES DE TOKENS
-# =============================================================================
-SUMMARY_MAX_TOKENS=8000
-CHAT_MAX_TOKENS=5000
-
-# =============================================================================
-# COMPRESSION AUDIO
-# =============================================================================
-AUDIO_COMPRESS_UPLOADS=true
-AUDIO_CODEC=mp3
-AUDIO_BITRATE=128k
-
-# =============================================================================
-# FONCTIONNALITÉS OPTIONNELLES
-# =============================================================================
-
-# Inquire Mode — recherche IA sur tous les enregistrements
-# Peut être activé sur DictIA 16 (plus de VRAM disponible)
-ENABLE_INQUIRE_MODE=false
-
-# Traitement automatique de fichiers (dossier surveillé)
-ENABLE_AUTO_PROCESSING=false
-# AUTO_PROCESS_MODE=admin_only
-# AUTO_PROCESS_WATCH_DIR=/data/auto-process
-
-# Export automatique
-ENABLE_AUTO_EXPORT=false
-# AUTO_EXPORT_DIR=/data/exports
-# AUTO_EXPORT_TRANSCRIPTION=true
-# AUTO_EXPORT_SUMMARY=true
-
-# Suppression automatique / rétention
-ENABLE_AUTO_DELETION=false
-# GLOBAL_RETENTION_DAYS=90
-# DELETION_MODE=audio_only
-
-# =============================================================================
-# PARTAGE
-# =============================================================================
-ENABLE_INTERNAL_SHARING=false
-ENABLE_PUBLIC_SHARING=true
-USERS_CAN_DELETE=true
-
-# =============================================================================
-# FILES D'ATTENTE DE TRAITEMENT
-# =============================================================================
-JOB_QUEUE_WORKERS=2
-SUMMARY_QUEUE_WORKERS=2
-JOB_MAX_RETRIES=3
-
-# =============================================================================
-# BASE DE DONNÉES ET STOCKAGE
-# =============================================================================
-SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
-UPLOAD_FOLDER=/data/uploads
-
-# =============================================================================
-# JOURNALISATION
-# =============================================================================
-# ERROR = production (minimal), INFO = débogage, DEBUG = développement
-LOG_LEVEL=ERROR
diff --git a/deployment/profiles/env.dictia8.example b/deployment/profiles/env.dictia8.example
deleted file mode 100644
index 3efbbe5..0000000
--- a/deployment/profiles/env.dictia8.example
+++ /dev/null
@@ -1,126 +0,0 @@
-# =============================================================================
-# DictIA 8 — Configuration (.env)
-# GPU : RTX 5060 (8 Go VRAM)
-# =============================================================================
-#
-# Architecture :
-#   - Transcription  : WhisperX Large-v3 (local, ~5,5 Go VRAM)
-#   - LLM (résumés)  : API cloud via OpenRouter (VRAM insuffisante pour LLM local)
-#
-# Démarrage rapide :
-#   1. cp config/env.dictia8.example .env
-#   2. Remplir TRANSCRIPTION_API_KEY et TEXT_MODEL_API_KEY
-#   3. docker compose -f config/docker-compose.dictia8.yml up -d
-# =============================================================================
-
-# =============================================================================
-# MODÈLE DE TEXTE — Résumés, titres, chat (REQUIS)
-# =============================================================================
-# DictIA 8 utilise un LLM cloud via OpenRouter (VRAM insuffisante pour LLM local).
-# Inscrivez-vous sur https://openrouter.ai pour obtenir une clé API.
-
-TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
-TEXT_MODEL_API_KEY=votre_cle_openrouter
-TEXT_MODEL_NAME=openai/gpt-4o-mini
-
-# =============================================================================
-# TRANSCRIPTION — WhisperX ASR local (REQUIS)
-# =============================================================================
-# WhisperX tourne en local dans un conteneur Docker séparé.
-# Le service ASR est défini dans docker-compose.dictia8.yml.
-
-ASR_BASE_URL=http://whisperx-asr:9000
-
-# Diarisation (identification automatique des locuteurs) — recommandé
-ASR_DIARIZE=true
-ASR_RETURN_SPEAKER_EMBEDDINGS=true
-
-# Nombre de locuteurs attendus (optionnel — aide la précision)
-# ASR_MIN_SPEAKERS=1
-# ASR_MAX_SPEAKERS=6
-
-# =============================================================================
-# PARAMÈTRES ADMINISTRATEUR
-# =============================================================================
-ADMIN_USERNAME=admin
-ADMIN_EMAIL=admin@votreentreprise.com
-ADMIN_PASSWORD=changeme
-
-# =============================================================================
-# ACCÈS ET INSCRIPTION
-# =============================================================================
-# Désactiver l'inscription publique (accès sur invitation uniquement)
-ALLOW_REGISTRATION=false
-
-# Restreindre l'inscription aux domaines autorisés
-# Exemple : REGISTRATION_ALLOWED_DOMAINS=votreentreprise.com
-REGISTRATION_ALLOWED_DOMAINS=
-
-# =============================================================================
-# FUSEAU HORAIRE
-# =============================================================================
-# Exemples : America/Toronto, America/Montreal, America/New_York, UTC
-TIMEZONE="America/Toronto"
-
-# =============================================================================
-# LIMITES DE TOKENS
-# =============================================================================
-SUMMARY_MAX_TOKENS=8000
-CHAT_MAX_TOKENS=5000
-
-# =============================================================================
-# COMPRESSION AUDIO
-# =============================================================================
-AUDIO_COMPRESS_UPLOADS=true
-AUDIO_CODEC=mp3
-AUDIO_BITRATE=128k
-
-# =============================================================================
-# FONCTIONNALITÉS OPTIONNELLES
-# =============================================================================
-
-# Inquire Mode — recherche IA sur tous les enregistrements
-# Désactivé sur DictIA 8 (VRAM insuffisante pour embeddings locaux)
-ENABLE_INQUIRE_MODE=false
-
-# Traitement automatique de fichiers (dossier surveillé)
-ENABLE_AUTO_PROCESSING=false
-# AUTO_PROCESS_MODE=admin_only
-# AUTO_PROCESS_WATCH_DIR=/data/auto-process
-
-# Export automatique
-ENABLE_AUTO_EXPORT=false
-# AUTO_EXPORT_DIR=/data/exports
-# AUTO_EXPORT_TRANSCRIPTION=true
-# AUTO_EXPORT_SUMMARY=true
-
-# Suppression automatique / rétention
-ENABLE_AUTO_DELETION=false
-# GLOBAL_RETENTION_DAYS=90
-# DELETION_MODE=audio_only
-
-# =============================================================================
-# PARTAGE
-# =============================================================================
-ENABLE_INTERNAL_SHARING=false
-ENABLE_PUBLIC_SHARING=true
-USERS_CAN_DELETE=true
-
-# =============================================================================
-# FILES D'ATTENTE DE TRAITEMENT
-# =============================================================================
-JOB_QUEUE_WORKERS=2
-SUMMARY_QUEUE_WORKERS=2
-JOB_MAX_RETRIES=3
-
-# =============================================================================
-# BASE DE DONNÉES ET STOCKAGE
-# =============================================================================
-SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
-UPLOAD_FOLDER=/data/uploads
-
-# =============================================================================
-# JOURNALISATION
-# =============================================================================
-# ERROR = production (minimal), INFO = débogage, DEBUG = développement
-LOG_LEVEL=ERROR
diff --git a/deployment/security/docker-daemon.json b/deployment/security/docker-daemon.json
deleted file mode 100644
index 217a460..0000000
--- a/deployment/security/docker-daemon.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-    "log-driver": "json-file",
-    "log-opts": {
-        "max-size": "10m",
-        "max-file": "3"
-    },
-    "storage-driver": "overlay2"
-}
diff --git a/deployment/security/docker-iptables.service b/deployment/security/docker-iptables.service
deleted file mode 100644
index 5a78b28..0000000
--- a/deployment/security/docker-iptables.service
+++ /dev/null
@@ -1,12 +0,0 @@
-[Unit]
-Description=DictIA Docker iptables rules
-After=docker.service tailscaled.service
-Requires=docker.service
-
-[Service]
-Type=oneshot
-RemainAfterExit=yes
-ExecStart=/bin/bash /opt/dictia/deployment/security/iptables-rules.sh
-
-[Install]
-WantedBy=multi-user.target
diff --git a/deployment/security/iptables-rules.sh b/deployment/security/iptables-rules.sh
deleted file mode 100644
index 376cd7c..0000000
--- a/deployment/security/iptables-rules.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — iptables rules for cloud VPS
-#
-# Allows Docker internal traffic to reach the ASR proxy on port 9090.
-# Blocks direct external access to Docker container IPs.
-# Tailscale + UFW handle the main firewall — this script adds Docker-specific rules.
-#
-# Usage: sudo bash iptables-rules.sh
-set -euo pipefail
-
-echo "=== DictIA iptables rules ==="
-
-# Allow Docker containers (172.16.0.0/12) to reach ASR proxy on port 9090
-# This rule goes BEFORE the default DROP policy so containers can talk to the proxy
-iptables -C INPUT -s 172.16.0.0/12 -p tcp --dport 9090 -j ACCEPT 2>/dev/null \
-    || iptables -I INPUT 1 -s 172.16.0.0/12 -p tcp --dport 9090 -j ACCEPT
-
-# Block direct external access to Docker container IPs (raw table, before conntrack)
-# Protects containers on non-default bridge networks (e.g., dictia-network)
-for NETWORK_ID in $(docker network ls --filter driver=bridge --format '{{.ID}}' 2>/dev/null); do
-    BRIDGE=$(docker network inspect "$NETWORK_ID" --format '{{.Options.com.docker.network.bridge.name}}' 2>/dev/null || echo "")
-    [ -z "$BRIDGE" ] && continue
-    [ "$BRIDGE" = "docker0" ] && continue
-
-    for CONTAINER_IP in $(docker network inspect "$NETWORK_ID" \
-        --format '{{range .Containers}}{{.IPv4Address}} {{end}}' 2>/dev/null); do
-        IP="${CONTAINER_IP%/*}"
-        [ -z "$IP" ] && continue
-        iptables -t raw -C PREROUTING -d "$IP" ! -i "$BRIDGE" -j DROP 2>/dev/null \
-            || iptables -t raw -A PREROUTING -d "$IP" ! -i "$BRIDGE" -j DROP
-        echo "  Protected $IP on $BRIDGE"
-    done
-done
-
-echo "Rules applied. Tailscale + Docker internal traffic allowed."
-echo "Verify with: sudo iptables -L -n -t raw"
diff --git a/deployment/setup.sh b/deployment/setup.sh
deleted file mode 100755
index dbf7fe3..0000000
--- a/deployment/setup.sh
+++ /dev/null
@@ -1,300 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — Main setup script
-#
-# Interactive installer that detects hardware and configures the appropriate
-# deployment profile (cloud, local-cpu, local-gpu).
-#
-# Usage:
-#   bash deployment/setup.sh                    # Interactive mode
-#   bash deployment/setup.sh --profile cloud    # Non-interactive
-#   bash deployment/setup.sh --profile local-gpu
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
-PROFILE=""
-
-for arg in "$@"; do
-    case "$arg" in
-        --profile=*) PROFILE="${arg#*=}" ;;
-        --profile)   shift_next=true ;;
-        *)
-            if [ "${shift_next:-false}" = true ]; then
-                PROFILE="$arg"
-                shift_next=false
-            fi
-            ;;
-    esac
-done
-
-# --- Colors ---
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-CYAN='\033[0;36m'
-NC='\033[0m'
-
-info()  { echo -e "${CYAN}[INFO]${NC} $*"; }
-ok()    { echo -e "${GREEN}[OK]${NC} $*"; }
-warn()  { echo -e "${YELLOW}[WARN]${NC} $*"; }
-err()   { echo -e "${RED}[ERROR]${NC} $*"; }
-
-echo
-echo -e "${CYAN}========================================${NC}"
-echo -e "${CYAN}  DictIA — Setup${NC}"
-echo -e "${CYAN}========================================${NC}"
-echo
-
-# ==========================================================================
-# 1. Hardware Detection
-# ==========================================================================
-info "Detecting hardware..."
-
-# Docker
-if command -v docker &>/dev/null && docker info &>/dev/null; then
-    DOCKER_VERSION=$(docker --version | grep -oP '\d+\.\d+\.\d+' | head -1)
-    ok "Docker $DOCKER_VERSION"
-else
-    err "Docker not found or not running."
-    echo "  Install Docker: https://docs.docker.com/engine/install/"
-    exit 1
-fi
-
-# Docker Compose
-if docker compose version &>/dev/null; then
-    COMPOSE_VERSION=$(docker compose version --short 2>/dev/null || echo "unknown")
-    ok "Docker Compose $COMPOSE_VERSION"
-else
-    err "Docker Compose not found."
-    echo "  Docker Compose V2 is required (comes with Docker Desktop or docker-compose-plugin)"
-    exit 1
-fi
-
-# GPU
-HAS_GPU=false
-if command -v nvidia-smi &>/dev/null; then
-    GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1 || echo "")
-    if [ -n "$GPU_NAME" ]; then
-        HAS_GPU=true
-        ok "NVIDIA GPU: $GPU_NAME"
-        # Check nvidia-container-toolkit
-        if docker info 2>/dev/null | grep -qi nvidia; then
-            ok "nvidia-container-toolkit detected"
-        else
-            warn "nvidia-container-toolkit not detected. Required for local-gpu profile."
-            echo "  Install: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
-        fi
-    fi
-else
-    info "No NVIDIA GPU detected"
-fi
-
-# RAM
-if command -v free &>/dev/null; then
-    RAM_GB=$(free -g | awk '/Mem:/{print $2}')
-    info "RAM: ${RAM_GB}GB"
-fi
-
-# Disk
-DISK_AVAIL=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $4}')
-info "Disk available: $DISK_AVAIL"
-
-echo
-
-# ==========================================================================
-# 2. Profile Selection
-# ==========================================================================
-if [ -z "$PROFILE" ]; then
-    echo -e "${CYAN}Select deployment profile:${NC}"
-    echo
-    echo "  1) cloud      — VPS with ASR Proxy (GCP GPU on demand)"
-    echo "                   Best for: remote servers, pay-per-use GPU"
-    echo
-    echo "  2) local-gpu  — Local NVIDIA GPU for transcription"
-    echo "                   Best for: dedicated GPU server, fastest"
-    if [ "$HAS_GPU" = false ]; then
-        echo -e "                   ${YELLOW}(No GPU detected on this machine)${NC}"
-    fi
-    echo
-    echo "  3) local-cpu  — CPU-only transcription (slow)"
-    echo "                   Best for: testing, low-volume usage"
-    echo
-    read -rp "Choice [1-3]: " CHOICE
-    case "$CHOICE" in
-        1) PROFILE="cloud" ;;
-        2) PROFILE="local-gpu" ;;
-        3) PROFILE="local-cpu" ;;
-        *) err "Invalid choice"; exit 1 ;;
-    esac
-fi
-
-COMPOSE_FILE="$SCRIPT_DIR/docker/docker-compose.$PROFILE.yml"
-if [ ! -f "$COMPOSE_FILE" ]; then
-    err "Compose file not found: $COMPOSE_FILE"
-    exit 1
-fi
-
-ok "Profile: $PROFILE"
-echo
-
-# ==========================================================================
-# 3. Generate .env
-# ==========================================================================
-ENV_FILE="$PROJECT_DIR/.env"
-
-if [ -f "$ENV_FILE" ]; then
-    warn ".env already exists. Keeping existing configuration."
-    echo "  To reconfigure, delete .env and re-run setup."
-else
-    info "Generating .env..."
-
-    # Generate secret key
-    SECRET_KEY=$(python3 -c "import secrets; print(secrets.token_hex(32))" 2>/dev/null \
-        || openssl rand -hex 32 2>/dev/null \
-        || head -c 64 /dev/urandom | xxd -p | head -c 64)
-
-    # Prompt for admin credentials
-    read -rp "Admin username [admin]: " ADMIN_USER
-    ADMIN_USER="${ADMIN_USER:-admin}"
-    read -rp "Admin email [admin@example.com]: " ADMIN_EMAIL
-    ADMIN_EMAIL="${ADMIN_EMAIL:-admin@example.com}"
-    read -rsp "Admin password: " ADMIN_PASS
-    echo
-    ADMIN_PASS="${ADMIN_PASS:-changeme}"
-
-    # Prompt for text model API key
-    echo
-    info "DictIA needs a text/LLM API key for summaries, titles, and chat."
-    echo "  Recommended: OpenRouter (https://openrouter.ai) — access to many models"
-    read -rp "Text model API key (or press Enter to skip): " TEXT_API_KEY
-    TEXT_API_KEY="${TEXT_API_KEY:-your_openrouter_api_key}"
-
-    # HuggingFace token for diarization
-    if [ "$PROFILE" != "cloud" ]; then
-        echo
-        info "For speaker diarization, a HuggingFace token is needed."
-        echo "  Get one at: https://huggingface.co/settings/tokens"
-        echo "  Accept model: https://huggingface.co/pyannote/speaker-diarization-3.1"
-        read -rp "HuggingFace token (or press Enter to skip): " HF_TOKEN
-        HF_TOKEN="${HF_TOKEN:-}"
-    else
-        HF_TOKEN=""
-    fi
-
-    # Write .env
-    cp "$SCRIPT_DIR/docker/.env.example" "$ENV_FILE"
-    sed -i "s|SECRET_KEY=.*|SECRET_KEY=$SECRET_KEY|" "$ENV_FILE"
-    sed -i "s|DICTIA_PROFILE=.*|DICTIA_PROFILE=$PROFILE|" "$ENV_FILE"
-    sed -i "s|ADMIN_USERNAME=.*|ADMIN_USERNAME=$ADMIN_USER|" "$ENV_FILE"
-    sed -i "s|ADMIN_EMAIL=.*|ADMIN_EMAIL=$ADMIN_EMAIL|" "$ENV_FILE"
-    sed -i "s|ADMIN_PASSWORD=.*|ADMIN_PASSWORD=$ADMIN_PASS|" "$ENV_FILE"
-    sed -i "s|TEXT_MODEL_API_KEY=.*|TEXT_MODEL_API_KEY=$TEXT_API_KEY|" "$ENV_FILE"
-    sed -i "s|HF_TOKEN=.*|HF_TOKEN=$HF_TOKEN|" "$ENV_FILE"
-
-    ok ".env generated"
-fi
-echo
-
-# ==========================================================================
-# 4. Create data directories
-# ==========================================================================
-info "Creating data directories..."
-mkdir -p "$PROJECT_DIR/data/uploads" "$PROJECT_DIR/data/instance"
-ok "data/uploads and data/instance created"
-echo
-
-# ==========================================================================
-# 5. Profile-specific setup
-# ==========================================================================
-case "$PROFILE" in
-    cloud)
-        info "Cloud profile — setting up ASR Proxy..."
-        if [ -f "$SCRIPT_DIR/asr-proxy/setup.sh" ]; then
-            echo "  Run the ASR proxy setup separately:"
-            echo "    bash $SCRIPT_DIR/asr-proxy/setup.sh"
-        fi
-        echo
-        info "Setting up iptables rules..."
-        if [ -f "$SCRIPT_DIR/security/iptables-rules.sh" ] && [ "$(id -u)" -eq 0 ]; then
-            bash "$SCRIPT_DIR/security/iptables-rules.sh"
-        else
-            echo "  Run as root: sudo bash $SCRIPT_DIR/security/iptables-rules.sh"
-        fi
-        echo
-        info "Setting up Tailscale Serve..."
-        if command -v tailscale &>/dev/null; then
-            echo "  Run: bash $SCRIPT_DIR/config/tailscale/setup-serve.sh"
-        else
-            warn "Tailscale not installed."
-            echo "  Install: curl -fsSL https://tailscale.com/install.sh | sh"
-        fi
-        ;;
-    local-gpu)
-        info "Local GPU profile — verifying NVIDIA runtime..."
-        if docker info 2>/dev/null | grep -qi nvidia; then
-            ok "NVIDIA Docker runtime available"
-            # Quick GPU test
-            if docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi &>/dev/null; then
-                ok "GPU test passed"
-            else
-                warn "GPU test failed. Check nvidia-container-toolkit installation."
-            fi
-        else
-            err "NVIDIA Docker runtime not found."
-            echo "  Install nvidia-container-toolkit and restart Docker."
-            echo "  https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
-        fi
-        ;;
-    local-cpu)
-        warn "CPU-only transcription is significantly slower than GPU."
-        echo "  Expect ~10x real-time (1h audio = ~10h processing)."
-        echo "  Consider local-gpu or cloud profile for better performance."
-        ;;
-esac
-
-echo
-
-# ==========================================================================
-# 6. Build and start
-# ==========================================================================
-info "Building DictIA Docker image..."
-cd "$PROJECT_DIR"
-docker build -t innova-ai/dictia:latest .
-ok "Image built"
-
-echo
-info "Starting DictIA ($PROFILE profile)..."
-docker compose -f "$COMPOSE_FILE" up -d
-ok "Containers started"
-
-# ==========================================================================
-# 7. Health check
-# ==========================================================================
-echo
-info "Waiting for DictIA to become healthy..."
-RETRIES=30
-for i in $(seq 1 $RETRIES); do
-    if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
-        ok "DictIA is healthy!"
-        break
-    fi
-    if [ "$i" -eq "$RETRIES" ]; then
-        warn "Health check timeout. Check logs: docker compose -f $COMPOSE_FILE logs"
-    fi
-    sleep 5
-done
-
-echo
-echo -e "${GREEN}========================================${NC}"
-echo -e "${GREEN}  DictIA is ready!${NC}"
-echo -e "${GREEN}========================================${NC}"
-echo
-echo "  App:       http://localhost:8899"
-echo "  Profile:   $PROFILE"
-echo "  Compose:   $COMPOSE_FILE"
-echo
-echo "  Tools:"
-echo "    Update:       bash deployment/tools/update.sh"
-echo "    Backup:       bash deployment/tools/backup.sh"
-echo "    Health check: bash deployment/tools/health-check.sh"
-echo
diff --git a/deployment/tools/backup.sh b/deployment/tools/backup.sh
deleted file mode 100644
index 17ee50a..0000000
--- a/deployment/tools/backup.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — Backup script
-#
-# Creates a timestamped backup of data, env, and Docker volumes.
-# Keeps the last N backups (default: 5).
-#
-# Usage: bash backup.sh [BACKUP_DIR]
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
-BACKUP_BASE="${1:-$PROJECT_DIR/backups}"
-KEEP_COUNT=5
-TIMESTAMP=$(date +%Y%m%d-%H%M%S)
-BACKUP_DIR="$BACKUP_BASE/dictia-$TIMESTAMP"
-
-echo "=== DictIA Backup ==="
-echo "Project: $PROJECT_DIR"
-echo "Backup:  $BACKUP_DIR"
-echo
-
-mkdir -p "$BACKUP_DIR"
-
-# 1. Data directory
-if [ -d "$PROJECT_DIR/data" ]; then
-    echo "[1/4] Backing up data/..."
-    cp -a "$PROJECT_DIR/data" "$BACKUP_DIR/data"
-else
-    echo "[1/4] No data/ directory found, skipping."
-fi
-
-# 2. Environment file
-if [ -f "$PROJECT_DIR/.env" ]; then
-    echo "[2/4] Backing up .env..."
-    cp "$PROJECT_DIR/.env" "$BACKUP_DIR/dot-env"
-else
-    echo "[2/4] No .env found, skipping."
-fi
-
-# 3. ASR Proxy stats
-ASR_STATS="$PROJECT_DIR/deployment/asr-proxy/usage-stats.json"
-if [ -f "$ASR_STATS" ]; then
-    echo "[3/4] Backing up ASR proxy stats..."
-    cp "$ASR_STATS" "$BACKUP_DIR/asr-usage-stats.json"
-else
-    echo "[3/4] No ASR proxy stats, skipping."
-fi
-
-# 4. Docker volumes (if using managed volumes)
-echo "[4/4] Checking Docker volumes..."
-if docker volume ls --format '{{.Name}}' 2>/dev/null | grep -q "whisperx-cache"; then
-    echo "  Exporting whisperx-cache volume..."
-    docker run --rm -v whisperx-cache:/source -v "$BACKUP_DIR":/backup \
-        alpine tar czf /backup/whisperx-cache.tar.gz -C /source . 2>/dev/null || true
-fi
-
-# Write manifest
-cat > "$BACKUP_DIR/manifest.json" <<MANIFEST
-{
-    "timestamp": "$TIMESTAMP",
-    "project_dir": "$PROJECT_DIR",
-    "hostname": "$(hostname)",
-    "contents": {
-        "data": $([ -d "$BACKUP_DIR/data" ] && echo "true" || echo "false"),
-        "env": $([ -f "$BACKUP_DIR/dot-env" ] && echo "true" || echo "false"),
-        "asr_stats": $([ -f "$BACKUP_DIR/asr-usage-stats.json" ] && echo "true" || echo "false"),
-        "whisperx_cache": $([ -f "$BACKUP_DIR/whisperx-cache.tar.gz" ] && echo "true" || echo "false")
-    }
-}
-MANIFEST
-
-# Compress
-echo
-echo "Compressing backup..."
-ARCHIVE="$BACKUP_BASE/dictia-$TIMESTAMP.tar.gz"
-tar czf "$ARCHIVE" -C "$BACKUP_BASE" "dictia-$TIMESTAMP"
-rm -rf "$BACKUP_DIR"
-echo "Archive: $ARCHIVE ($(du -h "$ARCHIVE" | cut -f1))"
-
-# Rotate old backups
-BACKUP_COUNT=$(ls -1 "$BACKUP_BASE"/dictia-*.tar.gz 2>/dev/null | wc -l)
-if [ "$BACKUP_COUNT" -gt "$KEEP_COUNT" ]; then
-    echo
-    echo "Rotating backups (keeping last $KEEP_COUNT)..."
-    ls -1t "$BACKUP_BASE"/dictia-*.tar.gz | tail -n +"$((KEEP_COUNT + 1))" | xargs rm -f
-fi
-
-echo
-echo "=== Backup complete ==="
diff --git a/deployment/tools/health-check.sh b/deployment/tools/health-check.sh
deleted file mode 100644
index 8075289..0000000
--- a/deployment/tools/health-check.sh
+++ /dev/null
@@ -1,157 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — Health check diagnostic
-#
-# Checks Docker, containers, endpoints, disk, RAM, and GPU.
-#
-# Usage:
-#   bash health-check.sh              # Human-readable output
-#   bash health-check.sh --json       # JSON output
-#   bash health-check.sh --quiet      # Exit code only (0=ok, 1=issue)
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
-OUTPUT="human"
-ISSUES=0
-
-for arg in "$@"; do
-    case "$arg" in
-        --json)  OUTPUT="json" ;;
-        --quiet) OUTPUT="quiet" ;;
-    esac
-done
-
-declare -A CHECKS
-
-check() {
-    local name="$1"
-    local status="$2"
-    local detail="${3:-}"
-    CHECKS["$name"]="$status|$detail"
-    if [ "$status" = "error" ] || [ "$status" = "warning" ]; then
-        ISSUES=$((ISSUES + 1))
-    fi
-}
-
-# --- Docker ---
-if command -v docker &>/dev/null && docker info &>/dev/null; then
-    check "docker" "ok" "Docker daemon running"
-else
-    check "docker" "error" "Docker not available"
-fi
-
-# --- Containers ---
-DICTIA_STATUS=$(docker inspect --format='{{.State.Health.Status}}' dictia 2>/dev/null || echo "not_found")
-if [ "$DICTIA_STATUS" = "healthy" ]; then
-    check "container_dictia" "ok" "healthy"
-elif [ "$DICTIA_STATUS" = "not_found" ]; then
-    check "container_dictia" "error" "container not found"
-else
-    check "container_dictia" "warning" "$DICTIA_STATUS"
-fi
-
-WHISPERX_STATUS=$(docker inspect --format='{{.State.Status}}' whisperx-asr 2>/dev/null || echo "not_found")
-if [ "$WHISPERX_STATUS" = "running" ]; then
-    check "container_whisperx" "ok" "running"
-elif [ "$WHISPERX_STATUS" = "not_found" ]; then
-    check "container_whisperx" "info" "not present (cloud profile?)"
-else
-    check "container_whisperx" "warning" "$WHISPERX_STATUS"
-fi
-
-# --- Endpoints ---
-if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
-    check "endpoint_dictia" "ok" "http://localhost:8899 responding"
-else
-    check "endpoint_dictia" "error" "http://localhost:8899 not responding"
-fi
-
-if curl -sf -o /dev/null -m 5 http://localhost:9000/health 2>/dev/null; then
-    check "endpoint_whisperx" "ok" "http://localhost:9000 responding"
-else
-    check "endpoint_whisperx" "info" "http://localhost:9000 not responding"
-fi
-
-if curl -sf -o /dev/null -m 5 http://localhost:9090/health 2>/dev/null; then
-    check "endpoint_asr_proxy" "ok" "http://localhost:9090 responding"
-else
-    check "endpoint_asr_proxy" "info" "http://localhost:9090 not responding"
-fi
-
-# --- Disk ---
-DISK_USED=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $5}' | tr -d '%')
-if [ -n "$DISK_USED" ]; then
-    if [ "$DISK_USED" -gt 90 ]; then
-        check "disk" "error" "${DISK_USED}% used"
-    elif [ "$DISK_USED" -gt 80 ]; then
-        check "disk" "warning" "${DISK_USED}% used"
-    else
-        check "disk" "ok" "${DISK_USED}% used"
-    fi
-fi
-
-# --- RAM ---
-if command -v free &>/dev/null; then
-    MEM_TOTAL=$(free -m | awk '/Mem:/{print $2}')
-    MEM_AVAIL=$(free -m | awk '/Mem:/{print $7}')
-    MEM_USED_PCT=$(( (MEM_TOTAL - MEM_AVAIL) * 100 / MEM_TOTAL ))
-    if [ "$MEM_USED_PCT" -gt 90 ]; then
-        check "memory" "warning" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
-    else
-        check "memory" "ok" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
-    fi
-fi
-
-# --- GPU ---
-if command -v nvidia-smi &>/dev/null; then
-    GPU_INFO=$(nvidia-smi --query-gpu=name,memory.used,memory.total --format=csv,noheader 2>/dev/null || echo "error")
-    if [ "$GPU_INFO" != "error" ]; then
-        check "gpu" "ok" "$GPU_INFO"
-    else
-        check "gpu" "warning" "nvidia-smi present but query failed"
-    fi
-fi
-
-# --- Output ---
-if [ "$OUTPUT" = "json" ]; then
-    echo "{"
-    echo "  \"timestamp\": \"$(date -Is)\","
-    echo "  \"issues\": $ISSUES,"
-    echo "  \"checks\": {"
-    FIRST=true
-    for name in "${!CHECKS[@]}"; do
-        IFS='|' read -r status detail <<< "${CHECKS[$name]}"
-        if [ "$FIRST" = true ]; then
-            FIRST=false
-        else
-            echo ","
-        fi
-        printf '    "%s": {"status": "%s", "detail": "%s"}' "$name" "$status" "$detail"
-    done
-    echo
-    echo "  }"
-    echo "}"
-elif [ "$OUTPUT" = "quiet" ]; then
-    exit $( [ "$ISSUES" -eq 0 ] && echo 0 || echo 1 )
-else
-    echo "=== DictIA Health Check ==="
-    echo
-    for name in docker container_dictia container_whisperx endpoint_dictia endpoint_whisperx endpoint_asr_proxy disk memory gpu; do
-        if [ -n "${CHECKS[$name]+x}" ]; then
-            IFS='|' read -r status detail <<< "${CHECKS[$name]}"
-            case "$status" in
-                ok)      ICON="[OK]" ;;
-                warning) ICON="[!!]" ;;
-                error)   ICON="[ERR]" ;;
-                info)    ICON="[--]" ;;
-            esac
-            printf "  %-22s %s  %s\n" "$name" "$ICON" "$detail"
-        fi
-    done
-    echo
-    if [ "$ISSUES" -eq 0 ]; then
-        echo "All checks passed."
-    else
-        echo "$ISSUES issue(s) found."
-    fi
-fi
diff --git a/deployment/tools/restore.sh b/deployment/tools/restore.sh
deleted file mode 100644
index 4c9d46a..0000000
--- a/deployment/tools/restore.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — Restore script
-#
-# Restores a DictIA backup archive created by backup.sh.
-#
-# Usage: bash restore.sh <ARCHIVE_PATH> [PROJECT_DIR]
-set -euo pipefail
-
-ARCHIVE="${1:-}"
-PROJECT_DIR="${2:-$(cd "$(dirname "$0")/../.." && pwd)}"
-
-if [ -z "$ARCHIVE" ] || [ ! -f "$ARCHIVE" ]; then
-    echo "Usage: bash restore.sh <backup-archive.tar.gz> [project-dir]"
-    echo
-    echo "Available backups:"
-    ls -1t "$PROJECT_DIR/backups"/dictia-*.tar.gz 2>/dev/null | head -5 || echo "  (none found)"
-    exit 1
-fi
-
-echo "=== DictIA Restore ==="
-echo "Archive: $ARCHIVE"
-echo "Target:  $PROJECT_DIR"
-echo
-
-# Validate archive
-echo "Validating archive..."
-TMPDIR=$(mktemp -d)
-tar xzf "$ARCHIVE" -C "$TMPDIR"
-BACKUP_DIR=$(ls -1d "$TMPDIR"/dictia-* | head -1)
-
-if [ ! -f "$BACKUP_DIR/manifest.json" ]; then
-    echo "ERROR: Invalid backup archive (no manifest.json)"
-    rm -rf "$TMPDIR"
-    exit 1
-fi
-
-echo "Manifest:"
-cat "$BACKUP_DIR/manifest.json"
-echo
-echo
-
-# Confirmation
-read -rp "This will overwrite current data. Continue? [y/N] " CONFIRM
-if [ "$CONFIRM" != "y" ] && [ "$CONFIRM" != "Y" ]; then
-    echo "Aborted."
-    rm -rf "$TMPDIR"
-    exit 0
-fi
-
-# Stop services
-echo
-echo "Stopping DictIA services..."
-COMPOSE_FILE=""
-for f in cloud local-cpu local-gpu; do
-    if [ -f "$PROJECT_DIR/deployment/docker/docker-compose.$f.yml" ]; then
-        COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml"
-    fi
-done
-if [ -n "$COMPOSE_FILE" ]; then
-    docker compose -f "$COMPOSE_FILE" down 2>/dev/null || true
-fi
-
-# Restore data
-if [ -d "$BACKUP_DIR/data" ]; then
-    echo "Restoring data/..."
-    rm -rf "$PROJECT_DIR/data"
-    cp -a "$BACKUP_DIR/data" "$PROJECT_DIR/data"
-fi
-
-# Restore .env
-if [ -f "$BACKUP_DIR/dot-env" ]; then
-    echo "Restoring .env..."
-    cp "$BACKUP_DIR/dot-env" "$PROJECT_DIR/.env"
-fi
-
-# Restore ASR stats
-if [ -f "$BACKUP_DIR/asr-usage-stats.json" ]; then
-    echo "Restoring ASR proxy stats..."
-    cp "$BACKUP_DIR/asr-usage-stats.json" "$PROJECT_DIR/deployment/asr-proxy/usage-stats.json"
-fi
-
-# Restore Docker volumes
-if [ -f "$BACKUP_DIR/whisperx-cache.tar.gz" ]; then
-    echo "Restoring whisperx-cache volume..."
-    docker volume create whisperx-cache 2>/dev/null || true
-    docker run --rm -v whisperx-cache:/target -v "$BACKUP_DIR":/backup \
-        alpine sh -c "cd /target && tar xzf /backup/whisperx-cache.tar.gz" 2>/dev/null || true
-fi
-
-# Cleanup
-rm -rf "$TMPDIR"
-
-# Restart services
-echo
-echo "Restarting DictIA..."
-if [ -n "$COMPOSE_FILE" ]; then
-    docker compose -f "$COMPOSE_FILE" up -d
-fi
-
-echo
-echo "=== Restore complete ==="
diff --git a/deployment/tools/update.sh b/deployment/tools/update.sh
deleted file mode 100644
index 54be4b7..0000000
--- a/deployment/tools/update.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env bash
-# DictIA — Update script
-#
-# Pulls latest code, rebuilds Docker image, and restarts services.
-# Detects the active deployment profile automatically.
-#
-# Usage: bash update.sh [--no-pull] [--no-build]
-set -euo pipefail
-
-SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
-PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
-NO_PULL=false
-NO_BUILD=false
-
-for arg in "$@"; do
-    case "$arg" in
-        --no-pull)  NO_PULL=true ;;
-        --no-build) NO_BUILD=true ;;
-        *)          echo "Unknown option: $arg"; exit 1 ;;
-    esac
-done
-
-echo "=== DictIA Update ==="
-echo "Project: $PROJECT_DIR"
-echo
-
-# 1. Detect active compose file
-COMPOSE_FILE=""
-PROFILE=""
-for f in cloud local-cpu local-gpu; do
-    CF="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml"
-    if [ -f "$CF" ] && docker compose -f "$CF" ps --quiet 2>/dev/null | grep -q .; then
-        COMPOSE_FILE="$CF"
-        PROFILE="$f"
-        break
-    fi
-done
-
-if [ -z "$COMPOSE_FILE" ]; then
-    # Fallback: check .env for profile
-    if [ -f "$PROJECT_DIR/.env" ]; then
-        PROFILE=$(grep -E '^DICTIA_PROFILE=' "$PROJECT_DIR/.env" 2>/dev/null | cut -d= -f2 || echo "cloud")
-    fi
-    PROFILE="${PROFILE:-cloud}"
-    COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$PROFILE.yml"
-fi
-
-echo "Profile: $PROFILE"
-echo "Compose: $COMPOSE_FILE"
-echo
-
-# 2. Git pull
-if [ "$NO_PULL" = false ]; then
-    echo "[1/5] Pulling latest code..."
-    cd "$PROJECT_DIR"
-    git pull origin dictia-branding
-else
-    echo "[1/5] Skipping git pull (--no-pull)"
-fi
-
-# 3. Rebuild DictIA image
-if [ "$NO_BUILD" = false ]; then
-    echo "[2/5] Building DictIA image..."
-    cd "$PROJECT_DIR"
-    docker build -t innova-ai/dictia:latest .
-else
-    echo "[2/5] Skipping build (--no-build)"
-fi
-
-# 3b. Pull upstream images (WhisperX) if local profile
-if [ "$PROFILE" != "cloud" ] && [ "$NO_BUILD" = false ]; then
-    echo "[3/5] Pulling upstream images (WhisperX)..."
-    docker compose -f "$COMPOSE_FILE" pull whisperx-asr 2>/dev/null || true
-else
-    echo "[3/5] Skipping upstream pull (cloud profile or --no-build)"
-fi
-
-# 4. Restart containers
-echo "[4/5] Restarting containers..."
-docker compose -f "$COMPOSE_FILE" down
-docker compose -f "$COMPOSE_FILE" up -d
-
-# 5. Wait for health
-echo "[5/5] Waiting for health check..."
-RETRIES=30
-for i in $(seq 1 $RETRIES); do
-    if docker compose -f "$COMPOSE_FILE" ps | grep -q "healthy"; then
-        echo "  DictIA is healthy!"
-        break
-    fi
-    if [ "$i" -eq "$RETRIES" ]; then
-        echo "  WARNING: Health check timeout. Check: docker compose -f $COMPOSE_FILE logs"
-    fi
-    sleep 5
-done
-
-# Cleanup dangling images
-echo
-echo "Cleaning up old images..."
-docker image prune -f 2>/dev/null || true
-
-echo
-echo "=== Update complete ==="
-echo "DictIA: http://localhost:8899"
-docker compose -f "$COMPOSE_FILE" ps