Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)
This commit is contained in:
105
deployment/README.md
Normal file
105
deployment/README.md
Normal file
@@ -0,0 +1,105 @@
|
||||
# DictIA — Deployment Infrastructure
|
||||
|
||||
Infrastructure de deploiement reproductible pour DictIA .
|
||||
|
||||
## Choix de profil
|
||||
|
||||
```
|
||||
Quel est ton setup?
|
||||
|
|
||||
+-- VPS / serveur cloud?
|
||||
| --> cloud (ASR Proxy GCP GPU on demand)
|
||||
|
|
||||
+-- Machine locale avec GPU NVIDIA?
|
||||
| --> local-gpu (WhisperX sur GPU, le plus rapide)
|
||||
|
|
||||
+-- Machine locale sans GPU?
|
||||
--> local-cpu (WhisperX sur CPU, lent mais fonctionnel)
|
||||
```
|
||||
|
||||
## Quickstart
|
||||
|
||||
```bash
|
||||
git clone https://gitea.innova-ai.ca/Innova-AI/dictia-public.git
|
||||
cd dictia
|
||||
git checkout dictia-branding
|
||||
bash deployment/setup.sh
|
||||
```
|
||||
|
||||
Le script detecte le hardware et guide l'installation.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
deployment/
|
||||
├── setup.sh # Installateur principal
|
||||
├── docker/
|
||||
│ ├── docker-compose.cloud.yml
|
||||
│ ├── docker-compose.local-cpu.yml
|
||||
│ ├── docker-compose.local-gpu.yml
|
||||
│ └── .env.example
|
||||
├── asr-proxy/ # Proxy GCP GPU (cloud seulement)
|
||||
│ ├── proxy.py
|
||||
│ ├── dashboard.html
|
||||
│ ├── requirements.txt
|
||||
│ ├── setup.sh
|
||||
│ └── asr-proxy.service
|
||||
├── security/ # Securite Docker (cloud)
|
||||
│ ├── docker-daemon.json
|
||||
│ ├── iptables-rules.sh
|
||||
│ └── docker-iptables.service
|
||||
├── config/
|
||||
│ ├── nginx/dictia.conf
|
||||
│ ├── tailscale/setup-serve.sh
|
||||
│ └── systemd/dictia.service
|
||||
├── tools/
|
||||
│ ├── backup.sh
|
||||
│ ├── restore.sh
|
||||
│ ├── update.sh
|
||||
│ └── health-check.sh
|
||||
└── docs/
|
||||
├── QUICKSTART.md
|
||||
├── VPS-SETUP.md
|
||||
├── LOCAL-SETUP.md
|
||||
├── MAINTENANCE.md
|
||||
└── TROUBLESHOOTING.md
|
||||
```
|
||||
|
||||
### Profil Cloud
|
||||
|
||||
```
|
||||
Internet --> Tailscale --> VPS
|
||||
|
|
||||
DictIA :8899
|
||||
|
|
||||
ASR Proxy :9090
|
||||
|
|
||||
GCP GPU (auto start/stop)
|
||||
|
|
||||
WhisperX :9000
|
||||
```
|
||||
|
||||
### Profil Local GPU/CPU
|
||||
|
||||
```
|
||||
localhost:8899 --> DictIA container
|
||||
|
|
||||
WhisperX container :9000
|
||||
|
|
||||
GPU local (ou CPU)
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
- [QUICKSTART.md](docs/QUICKSTART.md) — Demarrage rapide par profil
|
||||
- [VPS-SETUP.md](docs/VPS-SETUP.md) — Setup VPS complet from scratch
|
||||
- [LOCAL-SETUP.md](docs/LOCAL-SETUP.md) — Setup local GPU/CPU
|
||||
- [MAINTENANCE.md](docs/MAINTENANCE.md) — Backup, restore, update, monitoring
|
||||
- [TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) — Problemes courants + solutions
|
||||
|
||||
## Mise a jour upstream
|
||||
|
||||
Tous les fichiers dans `deployment/` sont specifiques a DictIA.
|
||||
Aucun conflit lors des merges upstream, sauf `deployment/setup.sh`
|
||||
(qui remplace le setup.sh original de Speakr).
|
||||
|
||||
5
deployment/asr-proxy/.gitignore
vendored
Normal file
5
deployment/asr-proxy/.gitignore
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
gcp-credentials.json
|
||||
usage-stats.json
|
||||
venv/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
22
deployment/asr-proxy/asr-proxy.service
Normal file
22
deployment/asr-proxy/asr-proxy.service
Normal file
@@ -0,0 +1,22 @@
|
||||
# TEMPLATE — Ne pas copier directement dans /etc/systemd/system/.
|
||||
# Les variables ${ASR_PROXY_USER} et ${ASR_PROXY_DIR} sont des placeholders.
|
||||
# Le fichier service réel est généré par setup.sh (via heredoc bash) avec les
|
||||
# valeurs résolues de $SERVICE_USER et $INSTALL_DIR.
|
||||
# Usage : sudo bash setup.sh (installe et active le service automatiquement)
|
||||
|
||||
[Unit]
|
||||
Description=DictIA ASR Proxy - GPU Auto-Start/Stop for WhisperX
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=${ASR_PROXY_USER}
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
WorkingDirectory=${ASR_PROXY_DIR}
|
||||
ExecStart=${ASR_PROXY_DIR}/venv/bin/python proxy.py
|
||||
Environment=GOOGLE_APPLICATION_CREDENTIALS=${ASR_PROXY_DIR}/gcp-credentials.json
|
||||
Environment=STATS_FILE=${ASR_PROXY_DIR}/usage-stats.json
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
1534
deployment/asr-proxy/dashboard.html
Normal file
1534
deployment/asr-proxy/dashboard.html
Normal file
File diff suppressed because it is too large
Load Diff
741
deployment/asr-proxy/proxy.py
Normal file
741
deployment/asr-proxy/proxy.py
Normal file
@@ -0,0 +1,741 @@
|
||||
"""DictIA ASR Proxy - Auto-start/stop GCP GPU for WhisperX + Ollama.
|
||||
|
||||
Uses Google Cloud Compute REST API directly (no gcloud CLI needed).
|
||||
Proxies both ASR (WhisperX) and LLM (Ollama) requests.
|
||||
Multi-zone fallback across Canada (Montreal + Toronto).
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
import httpx
|
||||
import jwt as pyjwt
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.responses import HTMLResponse, JSONResponse, Response
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||
log = logging.getLogger("asr-proxy")
|
||||
|
||||
# Config — paths relative to this script's directory by default
|
||||
SCRIPT_DIR = Path(__file__).parent
|
||||
GCP_PROJECT = os.getenv("GCP_PROJECT", "speakr-gpu")
|
||||
WHISPERX_PORT = int(os.getenv("WHISPERX_PORT", "9000"))
|
||||
OLLAMA_PORT = int(os.getenv("OLLAMA_PORT", "11434"))
|
||||
IDLE_TIMEOUT = int(os.getenv("IDLE_TIMEOUT", "300"))
|
||||
CREDS_FILE = os.getenv("GOOGLE_APPLICATION_CREDENTIALS", str(SCRIPT_DIR / "gcp-credentials.json"))
|
||||
STATS_FILE = os.getenv("STATS_FILE", str(SCRIPT_DIR / "usage-stats.json"))
|
||||
MONTHLY_LIMIT_HOURS = float(os.getenv("MONTHLY_LIMIT_HOURS", "30"))
|
||||
# Real GCP cost per GPU-hour (g2-standard-4 + L4): GPU ($0.837) + vCPU ($0.151) + RAM ($0.069)
|
||||
GPU_COST_PER_HOUR = float(os.getenv("GPU_COST_PER_HOUR", "1.06"))
|
||||
# Fixed monthly costs: SSD disks ($5.66) + snapshots ($4.19) ≈ $9.85/month
|
||||
FIXED_MONTHLY_COST = float(os.getenv("FIXED_MONTHLY_COST", "9.85"))
|
||||
SNAPSHOT_NAME = "whisperx-gpu-snapshot"
|
||||
HEALTH_POLL_INTERVAL = 5
|
||||
BOOT_TIMEOUT = 300
|
||||
|
||||
# Zone fallback order — Canada only, Montreal first
|
||||
ZONE_FALLBACKS = [
|
||||
{
|
||||
"zone": "northamerica-northeast1-b",
|
||||
"instance": "whisperx-gpu-mtl1",
|
||||
"machine_type": "g2-standard-4",
|
||||
"accelerator": "nvidia-l4",
|
||||
"accel_count": 1,
|
||||
"label": "Montreal-b (L4)",
|
||||
},
|
||||
{
|
||||
"zone": "northamerica-northeast1-c",
|
||||
"instance": "whisperx-gpu-mtl2",
|
||||
"machine_type": "n1-standard-4",
|
||||
"accelerator": "nvidia-tesla-t4",
|
||||
"accel_count": 1,
|
||||
"label": "Montreal-c (T4)",
|
||||
},
|
||||
{
|
||||
"zone": "northamerica-northeast2-a",
|
||||
"instance": "whisperx-gpu-tor1",
|
||||
"machine_type": "g2-standard-4",
|
||||
"accelerator": "nvidia-l4",
|
||||
"accel_count": 1,
|
||||
"label": "Toronto-a (L4)",
|
||||
},
|
||||
{
|
||||
"zone": "northamerica-northeast2-b",
|
||||
"instance": "whisperx-gpu",
|
||||
"machine_type": "g2-standard-4",
|
||||
"accelerator": "nvidia-l4",
|
||||
"accel_count": 1,
|
||||
"label": "Toronto-b (L4)",
|
||||
},
|
||||
]
|
||||
|
||||
STARTUP_SCRIPT = """#!/bin/bash
|
||||
systemctl start docker
|
||||
sleep 5
|
||||
docker start whisperx-asr 2>/dev/null || true
|
||||
systemctl start ollama 2>/dev/null || true
|
||||
"""
|
||||
|
||||
app = FastAPI(title="DictIA ASR Proxy")
|
||||
|
||||
# State
|
||||
last_request_time = 0.0
|
||||
active_requests = 0
|
||||
gpu_ip: str | None = None
|
||||
active_zone: dict | None = None
|
||||
shutdown_task: asyncio.Task | None = None
|
||||
|
||||
# Request history tracking (in-memory, last 20 requests)
|
||||
request_history: list[dict] = []
|
||||
MAX_HISTORY = 20
|
||||
|
||||
# Zone status tracking
|
||||
zone_status: dict[str, dict] = {}
|
||||
|
||||
# Startup lock and failure cooldown
|
||||
_startup_lock: asyncio.Lock | None = None
|
||||
_last_failure_time: float = 0
|
||||
FAILURE_COOLDOWN = 180
|
||||
|
||||
# OAuth2 token cache
|
||||
_access_token: str | None = None
|
||||
_token_expiry: float = 0
|
||||
|
||||
|
||||
# --- Usage Stats ---
|
||||
|
||||
def load_stats() -> dict:
|
||||
try:
|
||||
with open(STATS_FILE) as f:
|
||||
return json.load(f)
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
return {"gpu_seconds": 0, "month": time.strftime("%Y-%m"), "requests": 0, "last_start": 0}
|
||||
|
||||
|
||||
def save_stats(stats: dict):
|
||||
with open(STATS_FILE, "w") as f:
|
||||
json.dump(stats, f, indent=2)
|
||||
|
||||
|
||||
def track_gpu_time():
|
||||
stats = load_stats()
|
||||
current_month = time.strftime("%Y-%m")
|
||||
if stats.get("month") != current_month:
|
||||
stats = {"gpu_seconds": 0, "month": current_month, "requests": 0, "last_start": 0}
|
||||
if stats.get("last_start", 0) > 0:
|
||||
elapsed = time.time() - stats["last_start"]
|
||||
stats["gpu_seconds"] += elapsed
|
||||
stats["last_start"] = 0
|
||||
save_stats(stats)
|
||||
|
||||
|
||||
def check_budget() -> tuple[bool, float]:
|
||||
stats = load_stats()
|
||||
current_month = time.strftime("%Y-%m")
|
||||
if stats.get("month") != current_month:
|
||||
return True, 0.0
|
||||
hours_used = stats.get("gpu_seconds", 0) / 3600
|
||||
return hours_used < MONTHLY_LIMIT_HOURS, hours_used
|
||||
|
||||
|
||||
# --- GCP Auth ---
|
||||
|
||||
async def get_access_token() -> str:
|
||||
global _access_token, _token_expiry
|
||||
if _access_token and time.time() < _token_expiry - 60:
|
||||
return _access_token
|
||||
with open(CREDS_FILE) as f:
|
||||
creds = json.load(f)
|
||||
cred_type = creds.get("type", "authorized_user")
|
||||
async with httpx.AsyncClient() as client:
|
||||
if cred_type == "service_account":
|
||||
now = int(time.time())
|
||||
payload = {
|
||||
"iss": creds["client_email"],
|
||||
"scope": "https://www.googleapis.com/auth/compute",
|
||||
"aud": "https://oauth2.googleapis.com/token",
|
||||
"iat": now,
|
||||
"exp": now + 3600,
|
||||
}
|
||||
signed = pyjwt.encode(payload, creds["private_key"], algorithm="RS256")
|
||||
resp = await client.post(
|
||||
"https://oauth2.googleapis.com/token",
|
||||
data={
|
||||
"grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
|
||||
"assertion": signed,
|
||||
},
|
||||
)
|
||||
else:
|
||||
resp = await client.post(
|
||||
"https://oauth2.googleapis.com/token",
|
||||
data={
|
||||
"client_id": creds["client_id"],
|
||||
"client_secret": creds["client_secret"],
|
||||
"refresh_token": creds["refresh_token"],
|
||||
"grant_type": "refresh_token",
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
_access_token = data["access_token"]
|
||||
_token_expiry = time.time() + data.get("expires_in", 3600)
|
||||
log.info(f"Refreshed GCP access token ({cred_type})")
|
||||
return _access_token
|
||||
|
||||
|
||||
# --- GCP Compute API ---
|
||||
|
||||
COMPUTE_BASE = "https://compute.googleapis.com/compute/v1"
|
||||
|
||||
|
||||
async def gcp_api(method: str, url: str, **kwargs) -> httpx.Response:
|
||||
token = await get_access_token()
|
||||
async with httpx.AsyncClient(timeout=60) as client:
|
||||
resp = await client.request(
|
||||
method, url,
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
**kwargs,
|
||||
)
|
||||
return resp
|
||||
|
||||
|
||||
async def get_instance_info(zone: str, instance: str) -> dict | None:
|
||||
url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}"
|
||||
resp = await gcp_api("GET", url)
|
||||
if resp.status_code == 404:
|
||||
return None
|
||||
if resp.status_code >= 400:
|
||||
log.error(f"GCP API error {resp.status_code}: {resp.text}")
|
||||
return None
|
||||
return resp.json()
|
||||
|
||||
|
||||
def extract_ip(instance_data: dict) -> str:
|
||||
interfaces = instance_data.get("networkInterfaces", [])
|
||||
if interfaces:
|
||||
access = interfaces[0].get("accessConfigs", [])
|
||||
if access:
|
||||
return access[0].get("natIP", "")
|
||||
return ""
|
||||
|
||||
|
||||
async def start_instance_in_zone(zone: str, instance: str) -> bool:
|
||||
url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}/start"
|
||||
resp = await gcp_api("POST", url)
|
||||
if resp.status_code < 400:
|
||||
log.info(f"Start requested: {instance} in {zone}")
|
||||
return True
|
||||
log.warning(f"Failed to start {instance} in {zone}: {resp.status_code} {resp.text}")
|
||||
return False
|
||||
|
||||
|
||||
async def stop_instance_in_zone(zone: str, instance: str):
|
||||
url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}/stop"
|
||||
resp = await gcp_api("POST", url)
|
||||
if resp.status_code < 400:
|
||||
log.info(f"Stop requested: {instance} in {zone}")
|
||||
else:
|
||||
log.error(f"Failed to stop {instance} in {zone}: {resp.status_code}")
|
||||
|
||||
|
||||
async def create_instance_from_snapshot(config: dict) -> bool:
|
||||
zone = config["zone"]
|
||||
instance = config["instance"]
|
||||
machine = config["machine_type"]
|
||||
accel = config["accelerator"]
|
||||
accel_count = config["accel_count"]
|
||||
|
||||
log.info(f"Creating {instance} in {zone} from snapshot...")
|
||||
|
||||
body = {
|
||||
"name": instance,
|
||||
"machineType": f"zones/{zone}/machineTypes/{machine}",
|
||||
"disks": [{
|
||||
"boot": True,
|
||||
"autoDelete": True,
|
||||
"initializeParams": {
|
||||
"diskSizeGb": "50",
|
||||
"diskType": f"zones/{zone}/diskTypes/pd-ssd",
|
||||
"sourceSnapshot": f"global/snapshots/{SNAPSHOT_NAME}",
|
||||
},
|
||||
}],
|
||||
"networkInterfaces": [{
|
||||
"network": "global/networks/default",
|
||||
"accessConfigs": [{"type": "ONE_TO_ONE_NAT", "name": "External NAT"}],
|
||||
}],
|
||||
"guestAccelerators": [{
|
||||
"acceleratorType": f"zones/{zone}/acceleratorTypes/{accel}",
|
||||
"acceleratorCount": accel_count,
|
||||
}],
|
||||
"scheduling": {
|
||||
"onHostMaintenance": "TERMINATE",
|
||||
"automaticRestart": False,
|
||||
},
|
||||
"tags": {"items": ["whisperx-gpu"]},
|
||||
"metadata": {
|
||||
"items": [{"key": "startup-script", "value": STARTUP_SCRIPT}],
|
||||
},
|
||||
}
|
||||
|
||||
url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances"
|
||||
resp = await gcp_api("POST", url, json=body)
|
||||
|
||||
if resp.status_code < 400:
|
||||
log.info(f"Created {instance} in {zone}")
|
||||
return True
|
||||
|
||||
error_text = resp.text
|
||||
if "ZONE_RESOURCE_POOL_EXHAUSTED" in error_text:
|
||||
log.warning(f"No capacity in {zone} -- skipping")
|
||||
elif "QUOTA" in error_text.upper():
|
||||
log.warning(f"Quota exceeded for {zone}: {error_text[:200]}")
|
||||
else:
|
||||
log.error(f"Failed to create in {zone}: {resp.status_code} {error_text[:200]}")
|
||||
return False
|
||||
|
||||
|
||||
# --- Core Logic ---
|
||||
|
||||
async def wait_for_running(zone: str, instance: str, timeout: int = 120, grace: int = 15) -> bool:
|
||||
gone_count = 0
|
||||
start_time = time.time()
|
||||
for _ in range(timeout // 5):
|
||||
info = await get_instance_info(zone, instance)
|
||||
if info and info.get("status") == "RUNNING":
|
||||
return True
|
||||
status = info.get("status", "UNKNOWN") if info else "GONE"
|
||||
elapsed = time.time() - start_time
|
||||
if status == "GONE":
|
||||
gone_count += 1
|
||||
if gone_count >= 2:
|
||||
log.warning(f"{instance} in {zone}: instance disappeared (no capacity)")
|
||||
return False
|
||||
if status in ("STOPPING",):
|
||||
log.warning(f"{instance} in {zone}: status {status} (no capacity)")
|
||||
return False
|
||||
if status in ("TERMINATED", "STOPPED") and elapsed > grace:
|
||||
log.warning(f"{instance} in {zone}: status {status} after {elapsed:.0f}s (no capacity)")
|
||||
return False
|
||||
await asyncio.sleep(5)
|
||||
return False
|
||||
|
||||
|
||||
async def delete_instance(zone: str, instance: str):
|
||||
url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}"
|
||||
resp = await gcp_api("DELETE", url)
|
||||
if resp.status_code < 400:
|
||||
log.info(f"Deleted {instance} in {zone} to free quota")
|
||||
elif resp.status_code == 404:
|
||||
pass
|
||||
else:
|
||||
log.warning(f"Failed to delete {instance} in {zone}: {resp.status_code}")
|
||||
|
||||
|
||||
async def ensure_gpu_running() -> str:
|
||||
global gpu_ip, active_zone, _last_failure_time
|
||||
|
||||
if _last_failure_time > 0:
|
||||
remaining = FAILURE_COOLDOWN - (time.time() - _last_failure_time)
|
||||
if remaining > 0:
|
||||
log.info(f"GPU cooldown active ({int(remaining)}s remaining), waiting...")
|
||||
await asyncio.sleep(remaining)
|
||||
_last_failure_time = 0
|
||||
|
||||
async with _startup_lock:
|
||||
ok, hours = check_budget()
|
||||
if not ok:
|
||||
raise RuntimeError(f"Monthly GPU limit reached ({hours:.1f}h / {MONTHLY_LIMIT_HOURS}h)")
|
||||
|
||||
if active_zone:
|
||||
info = await get_instance_info(active_zone["zone"], active_zone["instance"])
|
||||
if info and info.get("status") == "RUNNING":
|
||||
gpu_ip = extract_ip(info)
|
||||
if gpu_ip:
|
||||
return gpu_ip
|
||||
|
||||
errors = []
|
||||
|
||||
for config in ZONE_FALLBACKS:
|
||||
zone = config["zone"]
|
||||
instance = config["instance"]
|
||||
label = config["label"]
|
||||
|
||||
log.info(f"Trying {label}...")
|
||||
info = await get_instance_info(zone, instance)
|
||||
|
||||
if info is None:
|
||||
created = await create_instance_from_snapshot(config)
|
||||
if not created:
|
||||
zone_status[label] = {
|
||||
"status": "no_capacity",
|
||||
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"last_error": "no capacity",
|
||||
}
|
||||
errors.append(f"{label}: no capacity")
|
||||
continue
|
||||
if not await wait_for_running(zone, instance, grace=30):
|
||||
zone_status[label] = {
|
||||
"status": "error",
|
||||
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"last_error": "created but failed to start",
|
||||
}
|
||||
errors.append(f"{label}: created but failed to start")
|
||||
await delete_instance(zone, instance)
|
||||
await asyncio.sleep(3)
|
||||
continue
|
||||
else:
|
||||
status = info.get("status", "UNKNOWN")
|
||||
|
||||
if status == "RUNNING":
|
||||
pass
|
||||
elif status in ("TERMINATED", "STOPPED"):
|
||||
zone_status[label] = {
|
||||
"status": "starting",
|
||||
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"last_error": None,
|
||||
}
|
||||
started = await start_instance_in_zone(zone, instance)
|
||||
if not started:
|
||||
zone_status[label] = {
|
||||
"status": "error",
|
||||
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"last_error": "start rejected",
|
||||
}
|
||||
errors.append(f"{label}: start rejected")
|
||||
continue
|
||||
if not await wait_for_running(zone, instance, grace=20):
|
||||
zone_status[label] = {
|
||||
"status": "error",
|
||||
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"last_error": "didn't reach RUNNING",
|
||||
}
|
||||
errors.append(f"{label}: didn't reach RUNNING")
|
||||
continue
|
||||
elif status in ("STAGING", "PROVISIONING"):
|
||||
zone_status[label] = {
|
||||
"status": "starting",
|
||||
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"last_error": None,
|
||||
}
|
||||
if not await wait_for_running(zone, instance):
|
||||
zone_status[label] = {
|
||||
"status": "error",
|
||||
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"last_error": f"stuck in {status}",
|
||||
}
|
||||
errors.append(f"{label}: stuck in {status}")
|
||||
continue
|
||||
elif status == "STOPPING":
|
||||
log.info(f"{label}: STOPPING, deleting to free quota")
|
||||
await delete_instance(zone, instance)
|
||||
await asyncio.sleep(3)
|
||||
zone_status[label] = {
|
||||
"status": "error",
|
||||
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"last_error": "was STOPPING, deleted",
|
||||
}
|
||||
errors.append(f"{label}: was STOPPING, deleted")
|
||||
continue
|
||||
|
||||
info = await get_instance_info(zone, instance)
|
||||
if info and info.get("status") == "RUNNING":
|
||||
gpu_ip = extract_ip(info)
|
||||
if gpu_ip:
|
||||
active_zone = config
|
||||
_last_failure_time = 0
|
||||
zone_status[label] = {
|
||||
"status": "running",
|
||||
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"last_error": None,
|
||||
}
|
||||
stats = load_stats()
|
||||
stats["last_start"] = time.time()
|
||||
stats["requests"] = stats.get("requests", 0) + 1
|
||||
stats["active_zone"] = label
|
||||
save_stats(stats)
|
||||
log.info(f"GPU ready in {label}, IP: {gpu_ip}")
|
||||
return gpu_ip
|
||||
|
||||
zone_status[label] = {
|
||||
"status": "error",
|
||||
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"last_error": "running but no IP",
|
||||
}
|
||||
errors.append(f"{label}: running but no IP")
|
||||
|
||||
_last_failure_time = time.time()
|
||||
raise RuntimeError(
|
||||
f"No GPU available in any Canadian zone. Tried: {'; '.join(errors)}"
|
||||
)
|
||||
|
||||
|
||||
async def ensure_gpu_ready() -> str:
|
||||
ip = await ensure_gpu_running()
|
||||
url = f"http://{ip}:{WHISPERX_PORT}/health"
|
||||
log.info(f"Waiting for WhisperX at {url}...")
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
for _ in range(BOOT_TIMEOUT // HEALTH_POLL_INTERVAL):
|
||||
try:
|
||||
resp = await client.get(url)
|
||||
if resp.status_code == 200:
|
||||
log.info("WhisperX is healthy!")
|
||||
return ip
|
||||
except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout):
|
||||
pass
|
||||
await asyncio.sleep(HEALTH_POLL_INTERVAL)
|
||||
raise RuntimeError("WhisperX did not become healthy in time")
|
||||
|
||||
|
||||
async def ensure_ollama_ready() -> str:
|
||||
ip = await ensure_gpu_running()
|
||||
url = f"http://{ip}:{OLLAMA_PORT}/api/tags"
|
||||
log.info(f"Waiting for Ollama at {url}...")
|
||||
async with httpx.AsyncClient(timeout=10) as client:
|
||||
for _ in range(BOOT_TIMEOUT // HEALTH_POLL_INTERVAL):
|
||||
try:
|
||||
resp = await client.get(url)
|
||||
if resp.status_code == 200:
|
||||
log.info("Ollama is healthy!")
|
||||
return ip
|
||||
except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout):
|
||||
pass
|
||||
await asyncio.sleep(HEALTH_POLL_INTERVAL)
|
||||
raise RuntimeError("Ollama did not become healthy in time")
|
||||
|
||||
|
||||
async def idle_shutdown_loop():
|
||||
while True:
|
||||
await asyncio.sleep(60)
|
||||
if last_request_time == 0 or active_zone is None:
|
||||
continue
|
||||
if active_requests > 0:
|
||||
continue
|
||||
elapsed = time.time() - last_request_time
|
||||
if elapsed >= IDLE_TIMEOUT:
|
||||
try:
|
||||
zone = active_zone["zone"]
|
||||
instance = active_zone["instance"]
|
||||
label = active_zone["label"]
|
||||
info = await get_instance_info(zone, instance)
|
||||
if info and info.get("status") == "RUNNING":
|
||||
log.info(f"Idle {int(elapsed)}s -- stopping {label}")
|
||||
await stop_instance_in_zone(zone, instance)
|
||||
track_gpu_time()
|
||||
except Exception as e:
|
||||
log.error(f"Error stopping: {e}")
|
||||
|
||||
|
||||
# --- Endpoints ---
|
||||
|
||||
@app.on_event("startup")
|
||||
async def on_startup():
|
||||
global shutdown_task, _startup_lock
|
||||
_startup_lock = asyncio.Lock()
|
||||
await get_access_token()
|
||||
shutdown_task = asyncio.create_task(idle_shutdown_loop())
|
||||
zones = ", ".join(c["label"] for c in ZONE_FALLBACKS)
|
||||
log.info(f"DictIA ASR Proxy started. Zones: [{zones}]. Idle: {IDLE_TIMEOUT}s, limit: {MONTHLY_LIMIT_HOURS}h")
|
||||
|
||||
|
||||
@app.post("/asr")
|
||||
async def asr_proxy(request: Request):
|
||||
global last_request_time, active_requests
|
||||
|
||||
body = await request.body()
|
||||
headers = {
|
||||
k: v for k, v in request.headers.items()
|
||||
if k.lower() not in ("host", "transfer-encoding")
|
||||
}
|
||||
|
||||
last_request_time = time.time()
|
||||
active_requests += 1
|
||||
start_time = time.time()
|
||||
result_status = 200
|
||||
try:
|
||||
ip = await ensure_gpu_ready()
|
||||
target = f"http://{ip}:{WHISPERX_PORT}/asr"
|
||||
log.info(f"Forwarding {len(body)} bytes to {target}")
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(7200.0)) as client:
|
||||
resp = await client.post(target, content=body, headers=headers)
|
||||
last_request_time = time.time()
|
||||
result_status = resp.status_code
|
||||
ct = resp.headers.get("content-type", "")
|
||||
if "application/json" in ct:
|
||||
return JSONResponse(content=resp.json(), status_code=resp.status_code)
|
||||
else:
|
||||
return JSONResponse(content=resp.text, status_code=resp.status_code)
|
||||
except httpx.ReadTimeout:
|
||||
result_status = 504
|
||||
return JSONResponse({"error": "Transcription timeout (2h)"}, status_code=504)
|
||||
except Exception as e:
|
||||
result_status = 502
|
||||
log.error(f"Proxy error: {e}")
|
||||
return JSONResponse({"error": str(e)}, status_code=502)
|
||||
finally:
|
||||
active_requests -= 1
|
||||
last_request_time = time.time()
|
||||
request_history.insert(0, {
|
||||
"time": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"type": "ASR",
|
||||
"duration_sec": round(time.time() - start_time, 1),
|
||||
"status": result_status,
|
||||
"zone": active_zone["label"] if active_zone else "none",
|
||||
})
|
||||
if len(request_history) > MAX_HISTORY:
|
||||
request_history.pop()
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
zone_label = active_zone["label"] if active_zone else "none"
|
||||
gpu_status = "unknown"
|
||||
if active_zone:
|
||||
try:
|
||||
info = await get_instance_info(active_zone["zone"], active_zone["instance"])
|
||||
gpu_status = info.get("status", "unknown") if info else "not_found"
|
||||
except Exception:
|
||||
pass
|
||||
ok, hours = check_budget()
|
||||
stats = load_stats()
|
||||
return {
|
||||
"proxy": "healthy",
|
||||
"gpu_instance": gpu_status,
|
||||
"gpu_zone": zone_label,
|
||||
"active_requests": active_requests,
|
||||
"idle_timeout": IDLE_TIMEOUT,
|
||||
"usage": {
|
||||
"month": stats.get("month"),
|
||||
"gpu_hours": round(hours, 2),
|
||||
"gpu_limit_hours": MONTHLY_LIMIT_HOURS,
|
||||
"requests_count": stats.get("requests", 0),
|
||||
"budget_ok": ok,
|
||||
},
|
||||
"gpu_ip": gpu_ip,
|
||||
"machine_type": active_zone.get("machine_type", "unknown") if active_zone else "unknown",
|
||||
"gpu_model": active_zone.get("accelerator", "unknown") if active_zone else "unknown",
|
||||
"idle_seconds": round(time.time() - last_request_time) if last_request_time > 0 else 0,
|
||||
"auto_shutdown_in": max(0, IDLE_TIMEOUT - round(time.time() - last_request_time)) if last_request_time > 0 and active_zone else None,
|
||||
"token_expires_in": round(_token_expiry - time.time()) if _token_expiry > 0 else None,
|
||||
}
|
||||
|
||||
|
||||
@app.get("/stats")
|
||||
async def get_stats():
|
||||
stats = load_stats()
|
||||
hours = stats.get("gpu_seconds", 0) / 3600
|
||||
gpu_cost = hours * GPU_COST_PER_HOUR
|
||||
total_cost = gpu_cost + FIXED_MONTHLY_COST
|
||||
return {
|
||||
"month": stats.get("month"),
|
||||
"gpu_hours": round(hours, 2),
|
||||
"gpu_minutes": round(hours * 60, 1),
|
||||
"estimated_cost_usd": round(total_cost, 2),
|
||||
"gpu_cost_usd": round(gpu_cost, 2),
|
||||
"fixed_cost_usd": FIXED_MONTHLY_COST,
|
||||
"monthly_limit_hours": MONTHLY_LIMIT_HOURS,
|
||||
"remaining_hours": round(MONTHLY_LIMIT_HOURS - hours, 2),
|
||||
"requests_count": stats.get("requests", 0),
|
||||
"active_zone": stats.get("active_zone", "none"),
|
||||
"cost_per_hour": GPU_COST_PER_HOUR,
|
||||
"recent_requests": request_history[:10],
|
||||
"zone_fallbacks": [
|
||||
{
|
||||
"label": config["label"],
|
||||
"zone": config["zone"],
|
||||
"machine": config["machine_type"],
|
||||
"gpu": config["accelerator"],
|
||||
**zone_status.get(config["label"], {"status": "unknown", "last_tried": None, "last_error": None}),
|
||||
}
|
||||
for config in ZONE_FALLBACKS
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@app.post("/gpu/start")
|
||||
async def gpu_start():
|
||||
try:
|
||||
ip = await ensure_gpu_ready()
|
||||
label = active_zone["label"] if active_zone else "unknown"
|
||||
return {"status": "running", "ip": ip, "zone": label}
|
||||
except Exception as e:
|
||||
return JSONResponse({"error": str(e)}, status_code=503)
|
||||
|
||||
|
||||
@app.post("/gpu/stop")
|
||||
async def gpu_stop():
|
||||
if not active_zone:
|
||||
return {"status": "no active instance"}
|
||||
try:
|
||||
await stop_instance_in_zone(active_zone["zone"], active_zone["instance"])
|
||||
track_gpu_time()
|
||||
return {"status": "stopped", "zone": active_zone["label"]}
|
||||
except Exception as e:
|
||||
return JSONResponse({"error": str(e)}, status_code=500)
|
||||
|
||||
|
||||
DASHBOARD_HTML = Path(__file__).parent / "dashboard.html"
|
||||
|
||||
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
async def dashboard():
|
||||
if DASHBOARD_HTML.exists():
|
||||
return HTMLResponse(DASHBOARD_HTML.read_text(encoding="utf-8"))
|
||||
return HTMLResponse("<h1>Dashboard not found</h1><p>Place dashboard.html next to proxy.py</p>", status_code=404)
|
||||
|
||||
|
||||
@app.api_route("/v1/{path:path}", methods=["POST", "GET"])
|
||||
async def llm_proxy(request: Request, path: str):
|
||||
global last_request_time, active_requests
|
||||
|
||||
body = await request.body()
|
||||
headers = {
|
||||
k: v for k, v in request.headers.items()
|
||||
if k.lower() not in ("host", "transfer-encoding")
|
||||
}
|
||||
|
||||
last_request_time = time.time()
|
||||
active_requests += 1
|
||||
start_time = time.time()
|
||||
result_status = 200
|
||||
try:
|
||||
ip = await ensure_ollama_ready()
|
||||
target = f"http://{ip}:{OLLAMA_PORT}/v1/{path}"
|
||||
log.info(f"Forwarding LLM request to {target}")
|
||||
async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client:
|
||||
resp = await client.request(request.method, target, content=body, headers=headers)
|
||||
last_request_time = time.time()
|
||||
result_status = resp.status_code
|
||||
return Response(
|
||||
content=resp.content,
|
||||
status_code=resp.status_code,
|
||||
media_type=resp.headers.get("content-type"),
|
||||
)
|
||||
except httpx.ReadTimeout:
|
||||
result_status = 504
|
||||
return JSONResponse({"error": "LLM timeout (5min)"}, status_code=504)
|
||||
except Exception as e:
|
||||
result_status = 502
|
||||
log.error(f"LLM proxy error: {e}")
|
||||
return JSONResponse({"error": str(e)}, status_code=502)
|
||||
finally:
|
||||
active_requests -= 1
|
||||
last_request_time = time.time()
|
||||
request_history.insert(0, {
|
||||
"time": time.strftime("%Y-%m-%dT%H:%M:%S"),
|
||||
"type": "LLM",
|
||||
"duration_sec": round(time.time() - start_time, 1),
|
||||
"status": result_status,
|
||||
"zone": active_zone["label"] if active_zone else "none",
|
||||
})
|
||||
if len(request_history) > MAX_HISTORY:
|
||||
request_history.pop()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import uvicorn
|
||||
uvicorn.run(app, host="0.0.0.0", port=9090)
|
||||
5
deployment/asr-proxy/requirements.txt
Normal file
5
deployment/asr-proxy/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
fastapi==0.115.0
|
||||
uvicorn==0.30.0
|
||||
httpx==0.27.0
|
||||
PyJWT==2.9.0
|
||||
cryptography>=43.0.0
|
||||
87
deployment/asr-proxy/setup.sh
Normal file
87
deployment/asr-proxy/setup.sh
Normal file
@@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env bash
|
||||
# DictIA ASR Proxy — Setup script
|
||||
# Installs the GCP GPU proxy for cloud deployments.
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
INSTALL_DIR="${ASR_PROXY_DIR:-$SCRIPT_DIR}"
|
||||
SERVICE_USER="${ASR_PROXY_USER:-$(whoami)}"
|
||||
|
||||
echo "=== DictIA ASR Proxy Setup ==="
|
||||
echo "Install directory: $INSTALL_DIR"
|
||||
echo "Service user: $SERVICE_USER"
|
||||
echo
|
||||
|
||||
# 1. Create virtual environment
|
||||
if [ ! -d "$INSTALL_DIR/venv" ]; then
|
||||
echo "[1/4] Creating Python virtual environment..."
|
||||
python3 -m venv "$INSTALL_DIR/venv"
|
||||
else
|
||||
echo "[1/4] Virtual environment already exists."
|
||||
fi
|
||||
|
||||
# 2. Install dependencies
|
||||
echo "[2/4] Installing Python dependencies..."
|
||||
"$INSTALL_DIR/venv/bin/pip" install --quiet --upgrade pip
|
||||
"$INSTALL_DIR/venv/bin/pip" install --quiet -r "$INSTALL_DIR/requirements.txt"
|
||||
|
||||
# 3. GCP credentials
|
||||
if [ ! -f "$INSTALL_DIR/gcp-credentials.json" ]; then
|
||||
echo "[3/4] GCP credentials not found."
|
||||
echo " Place your GCP service account or OAuth credentials at:"
|
||||
echo " $INSTALL_DIR/gcp-credentials.json"
|
||||
echo
|
||||
echo " For service account: download JSON from GCP Console > IAM > Service Accounts"
|
||||
echo " For user credentials: run 'gcloud auth application-default login' and copy the file"
|
||||
echo
|
||||
read -rp " Path to credentials file (or press Enter to skip): " CREDS_PATH
|
||||
if [ -n "$CREDS_PATH" ] && [ -f "$CREDS_PATH" ]; then
|
||||
cp "$CREDS_PATH" "$INSTALL_DIR/gcp-credentials.json"
|
||||
chmod 600 "$INSTALL_DIR/gcp-credentials.json"
|
||||
echo " Credentials copied."
|
||||
else
|
||||
echo " Skipped. You must add credentials before starting the proxy."
|
||||
fi
|
||||
else
|
||||
echo "[3/4] GCP credentials found."
|
||||
fi
|
||||
|
||||
# 4. Install systemd service
|
||||
echo "[4/4] Installing systemd service..."
|
||||
SERVICE_FILE="/etc/systemd/system/asr-proxy.service"
|
||||
|
||||
cat > /tmp/asr-proxy.service <<UNIT
|
||||
[Unit]
|
||||
Description=DictIA ASR Proxy - GPU Auto-Start/Stop for WhisperX
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=$SERVICE_USER
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
WorkingDirectory=$INSTALL_DIR
|
||||
ExecStart=$INSTALL_DIR/venv/bin/python proxy.py
|
||||
Environment=GOOGLE_APPLICATION_CREDENTIALS=$INSTALL_DIR/gcp-credentials.json
|
||||
Environment=STATS_FILE=$INSTALL_DIR/usage-stats.json
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
UNIT
|
||||
|
||||
if [ "$(id -u)" -eq 0 ]; then
|
||||
cp /tmp/asr-proxy.service "$SERVICE_FILE"
|
||||
systemctl daemon-reload
|
||||
systemctl enable asr-proxy.service
|
||||
echo " Service installed and enabled."
|
||||
echo " Start with: systemctl start asr-proxy"
|
||||
else
|
||||
echo " Run as root to install systemd service, or copy manually:"
|
||||
echo " sudo cp /tmp/asr-proxy.service $SERVICE_FILE"
|
||||
echo " sudo systemctl daemon-reload && sudo systemctl enable asr-proxy"
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "=== Setup complete ==="
|
||||
echo "Dashboard: http://localhost:9090"
|
||||
echo "Health: http://localhost:9090/health"
|
||||
83
deployment/config/nginx/dictia.conf
Normal file
83
deployment/config/nginx/dictia.conf
Normal file
@@ -0,0 +1,83 @@
|
||||
# DictIA — Nginx reverse proxy configuration
|
||||
#
|
||||
# Alternative to Tailscale Serve for exposing DictIA over HTTPS.
|
||||
# Replace YOUR_DOMAIN with your actual domain name.
|
||||
#
|
||||
# Install: sudo cp dictia.conf /etc/nginx/sites-available/dictia
|
||||
# sudo ln -s /etc/nginx/sites-available/dictia /etc/nginx/sites-enabled/
|
||||
# sudo nginx -t && sudo systemctl reload nginx
|
||||
#
|
||||
# For HTTPS with Let's Encrypt:
|
||||
# sudo certbot --nginx -d YOUR_DOMAIN
|
||||
|
||||
upstream dictia_app {
|
||||
server 127.0.0.1:8899;
|
||||
}
|
||||
|
||||
upstream asr_proxy {
|
||||
server 127.0.0.1:9090;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
server_name YOUR_DOMAIN;
|
||||
|
||||
# Redirect HTTP to HTTPS (uncomment after certbot setup)
|
||||
# return 301 https://$host$request_uri;
|
||||
|
||||
client_max_body_size 500M;
|
||||
|
||||
# DictIA app
|
||||
location / {
|
||||
proxy_pass http://dictia_app;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# WebSocket support (for real-time features)
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Upgrade $http_upgrade;
|
||||
proxy_set_header Connection "upgrade";
|
||||
|
||||
# Long timeouts for transcription uploads
|
||||
proxy_read_timeout 3600s;
|
||||
proxy_send_timeout 3600s;
|
||||
proxy_connect_timeout 60s;
|
||||
}
|
||||
|
||||
# ASR Proxy dashboard (optional, restrict access)
|
||||
location /asr-proxy/ {
|
||||
proxy_pass http://asr_proxy/;
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
}
|
||||
}
|
||||
|
||||
# HTTPS server block (managed by certbot, uncomment after setup)
|
||||
# server {
|
||||
# listen 443 ssl;
|
||||
# server_name YOUR_DOMAIN;
|
||||
#
|
||||
# ssl_certificate /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem;
|
||||
# ssl_certificate_key /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem;
|
||||
# include /etc/letsencrypt/options-ssl-nginx.conf;
|
||||
# ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;
|
||||
#
|
||||
# client_max_body_size 500M;
|
||||
#
|
||||
# location / {
|
||||
# proxy_pass http://dictia_app;
|
||||
# proxy_set_header Host $host;
|
||||
# proxy_set_header X-Real-IP $remote_addr;
|
||||
# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
# proxy_set_header X-Forwarded-Proto $scheme;
|
||||
# proxy_http_version 1.1;
|
||||
# proxy_set_header Upgrade $http_upgrade;
|
||||
# proxy_set_header Connection "upgrade";
|
||||
# proxy_read_timeout 3600s;
|
||||
# proxy_send_timeout 3600s;
|
||||
# }
|
||||
# }
|
||||
15
deployment/config/systemd/dictia.service
Normal file
15
deployment/config/systemd/dictia.service
Normal file
@@ -0,0 +1,15 @@
|
||||
[Unit]
|
||||
Description=DictIA - Docker Compose Application
|
||||
After=docker.service
|
||||
Requires=docker.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=yes
|
||||
WorkingDirectory=/opt/dictia
|
||||
ExecStart=/usr/bin/docker compose -f deployment/docker/docker-compose.cloud.yml up -d
|
||||
ExecStop=/usr/bin/docker compose -f deployment/docker/docker-compose.cloud.yml down
|
||||
TimeoutStartSec=120
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
52
deployment/config/tailscale/setup-serve.sh
Normal file
52
deployment/config/tailscale/setup-serve.sh
Normal file
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env bash
|
||||
# DictIA — Tailscale Serve/Funnel setup
|
||||
#
|
||||
# Exposes DictIA and ASR Proxy dashboard via Tailscale HTTPS.
|
||||
# Based on the VPS production configuration.
|
||||
#
|
||||
# Usage:
|
||||
# bash setup-serve.sh [serve|funnel]
|
||||
# serve — accessible only within your tailnet (default)
|
||||
# funnel — accessible from the public internet
|
||||
set -euo pipefail
|
||||
|
||||
MODE="${1:-serve}"
|
||||
|
||||
echo "=== DictIA Tailscale Setup ==="
|
||||
echo "Mode: $MODE"
|
||||
echo
|
||||
|
||||
# Verify Tailscale is connected
|
||||
if ! tailscale status >/dev/null 2>&1; then
|
||||
echo "ERROR: Tailscale is not running or not connected."
|
||||
echo " Install: curl -fsSL https://tailscale.com/install.sh | sh"
|
||||
echo " Connect: sudo tailscale up"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
HOSTNAME=$(tailscale status --json | python3 -c "import sys,json; print(json.load(sys.stdin)['Self']['DNSName'].rstrip('.'))" 2>/dev/null || echo "unknown")
|
||||
echo "Tailscale hostname: $HOSTNAME"
|
||||
echo
|
||||
|
||||
# DictIA app on :443 → localhost:8899
|
||||
echo "[1/2] Setting up DictIA app (port 443 → 8899)..."
|
||||
if [ "$MODE" = "funnel" ]; then
|
||||
tailscale funnel --bg --https=443 http://localhost:8899
|
||||
else
|
||||
tailscale serve --bg --https=443 http://localhost:8899
|
||||
fi
|
||||
|
||||
# ASR Proxy dashboard on :9443 → localhost:9090
|
||||
echo "[2/2] Setting up ASR Proxy dashboard (port 9443 → 9090)..."
|
||||
if [ "$MODE" = "funnel" ]; then
|
||||
tailscale funnel --bg --https=9443 http://localhost:9090
|
||||
else
|
||||
tailscale serve --bg --https=9443 http://localhost:9090
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "=== Setup complete ==="
|
||||
echo "DictIA: https://$HOSTNAME/"
|
||||
echo "ASR Dashboard: https://$HOSTNAME:9443/"
|
||||
echo
|
||||
echo "Verify with: tailscale serve status"
|
||||
124
deployment/docker/.env.example
Normal file
124
deployment/docker/.env.example
Normal file
@@ -0,0 +1,124 @@
|
||||
# =============================================================================
|
||||
# DictIA — Unified Environment Configuration
|
||||
# =============================================================================
|
||||
#
|
||||
# Copy this file to the project root as .env and edit the values.
|
||||
# cp deployment/docker/.env.example .env
|
||||
#
|
||||
# This template combines upstream settings with DictIA deployment vars.
|
||||
# See: config/env.transcription.example for full upstream documentation.
|
||||
|
||||
# =============================================================================
|
||||
# FLASK SECRET KEY (REQUIRED — auto-generated by setup.sh)
|
||||
# =============================================================================
|
||||
SECRET_KEY=change-me-to-a-random-string
|
||||
|
||||
# =============================================================================
|
||||
# DEPLOYMENT PROFILE (used by deployment scripts)
|
||||
# =============================================================================
|
||||
# Options: cloud, local-cpu, local-gpu
|
||||
DICTIA_PROFILE=cloud
|
||||
|
||||
# =============================================================================
|
||||
# TEXT GENERATION MODEL (REQUIRED for summaries, titles, chat)
|
||||
# =============================================================================
|
||||
TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
|
||||
TEXT_MODEL_API_KEY=your_openrouter_api_key
|
||||
TEXT_MODEL_NAME=openai/gpt-4o-mini
|
||||
|
||||
# =============================================================================
|
||||
# TRANSCRIPTION CONFIGURATION
|
||||
# =============================================================================
|
||||
# For cloud profile (ASR Proxy → GCP GPU):
|
||||
# ASR_BASE_URL is set automatically in docker-compose.cloud.yml
|
||||
# No need to set it here.
|
||||
#
|
||||
# For local profiles (WhisperX sidecar):
|
||||
# ASR_BASE_URL is set automatically in docker-compose.local-*.yml
|
||||
# No need to set it here.
|
||||
#
|
||||
# For OpenAI API instead of self-hosted ASR:
|
||||
# TRANSCRIPTION_API_KEY=sk-your_openai_api_key
|
||||
# TRANSCRIPTION_MODEL=gpt-4o-transcribe-diarize
|
||||
|
||||
# ASR model (for local WhisperX profiles)
|
||||
ASR_MODEL=large-v3
|
||||
|
||||
# HuggingFace token (required for diarization with pyannote)
|
||||
# Get yours at: https://huggingface.co/settings/tokens
|
||||
# Must accept: https://huggingface.co/pyannote/speaker-diarization-3.1
|
||||
HF_TOKEN=
|
||||
|
||||
# =============================================================================
|
||||
# ASR PROXY — CLOUD PROFILE ONLY
|
||||
# =============================================================================
|
||||
# GCP project for GPU instances
|
||||
# GCP_PROJECT=your-gcp-project
|
||||
|
||||
# Monthly GPU budget limit in hours (default: 50)
|
||||
# MONTHLY_LIMIT_HOURS=50
|
||||
|
||||
# Idle timeout before auto-stopping GPU (seconds, default: 300)
|
||||
# IDLE_TIMEOUT=300
|
||||
|
||||
# =============================================================================
|
||||
# APPLICATION SETTINGS
|
||||
# =============================================================================
|
||||
ADMIN_USERNAME=admin
|
||||
ADMIN_EMAIL=admin@example.com
|
||||
ADMIN_PASSWORD=changeme
|
||||
|
||||
ALLOW_REGISTRATION=false
|
||||
TIMEZONE="America/Toronto"
|
||||
LOG_LEVEL=ERROR
|
||||
LOCALE=fr_CA
|
||||
DEFAULT_LANGUAGE=fr
|
||||
SHOW_USERNAMES_IN_UI=true
|
||||
SESSION_COOKIE_HTTPONLY=true
|
||||
SESSION_COOKIE_SAMESITE=Lax
|
||||
SESSION_COOKIE_SECURE=true
|
||||
|
||||
# =============================================================================
|
||||
# OPTIONAL FEATURES
|
||||
# =============================================================================
|
||||
ENABLE_INQUIRE_MODE=false
|
||||
ENABLE_AUTO_PROCESSING=false
|
||||
ENABLE_AUTO_EXPORT=false
|
||||
ENABLE_AUTO_DELETION=false
|
||||
ENABLE_INTERNAL_SHARING=true
|
||||
ENABLE_PUBLIC_SHARING=true
|
||||
ENABLE_FOLDERS=true
|
||||
VIDEO_RETENTION=true
|
||||
USERS_CAN_DELETE=true
|
||||
|
||||
# =============================================================================
|
||||
# BACKGROUND PROCESSING
|
||||
# =============================================================================
|
||||
JOB_QUEUE_WORKERS=4
|
||||
SUMMARY_QUEUE_WORKERS=4
|
||||
JOB_MAX_RETRIES=3
|
||||
MAX_CONCURRENT_UPLOADS=3
|
||||
|
||||
# =============================================================================
|
||||
# TRANSCRIPTION SETTINGS
|
||||
# =============================================================================
|
||||
TRANSCRIPTION_CONNECTOR=asr_endpoint
|
||||
USE_NEW_TRANSCRIPTION_ARCHITECTURE=true
|
||||
ENABLE_CHUNKING=true
|
||||
CHUNK_LIMIT=2400s
|
||||
CHUNK_OVERLAP_SECONDS=5
|
||||
|
||||
# =============================================================================
|
||||
# LLM / SUMMARY SETTINGS
|
||||
# =============================================================================
|
||||
SUMMARY_LANGUAGE=fr
|
||||
SUMMARY_MAX_TOKENS=16000
|
||||
CHAT_MAX_TOKENS=12000
|
||||
ENABLE_STREAM_OPTIONS=false
|
||||
ENABLE_THINKING=false
|
||||
|
||||
# =============================================================================
|
||||
# DOCKER/DATABASE
|
||||
# =============================================================================
|
||||
SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
|
||||
UPLOAD_FOLDER=/data/uploads
|
||||
40
deployment/docker/docker-compose.cloud.yml
Normal file
40
deployment/docker/docker-compose.cloud.yml
Normal file
@@ -0,0 +1,40 @@
|
||||
# DictIA — Cloud deployment (VPS + ASR Proxy GCP GPU)
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f deployment/docker/docker-compose.cloud.yml up -d
|
||||
#
|
||||
# ASR is handled by the external asr-proxy (port 9090) which auto-starts
|
||||
# a GCP GPU instance on demand. DictIA connects via host.docker.internal.
|
||||
|
||||
services:
|
||||
dictia:
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: Dockerfile
|
||||
image: innova-ai/dictia:latest
|
||||
container_name: dictia
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8899:8899"
|
||||
env_file:
|
||||
- ../../.env
|
||||
environment:
|
||||
- LOG_LEVEL=${LOG_LEVEL:-ERROR}
|
||||
- ASR_BASE_URL=http://host.docker.internal:9090
|
||||
volumes:
|
||||
- ../../data/uploads:/data/uploads
|
||||
- ../../data/instance:/data/instance
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
healthcheck:
|
||||
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
networks:
|
||||
- dictia-network
|
||||
|
||||
networks:
|
||||
dictia-network:
|
||||
driver: bridge
|
||||
64
deployment/docker/docker-compose.local-cpu.yml
Normal file
64
deployment/docker/docker-compose.local-cpu.yml
Normal file
@@ -0,0 +1,64 @@
|
||||
# DictIA — Local CPU deployment (WhisperX on CPU + DictIA)
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f deployment/docker/docker-compose.local-cpu.yml up -d
|
||||
#
|
||||
# Warning: CPU transcription is significantly slower than GPU.
|
||||
# Expect ~10x real-time for large files (e.g., 1h audio = ~10h processing).
|
||||
|
||||
services:
|
||||
whisperx-asr:
|
||||
image: ghcr.io/jim60105/whisperx-asr:latest
|
||||
container_name: whisperx-asr
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "9000:9000"
|
||||
environment:
|
||||
- ASR_MODEL=${ASR_MODEL:-large-v3}
|
||||
- ASR_ENGINE=whisperx
|
||||
- DEVICE=cpu
|
||||
- COMPUTE_TYPE=float32
|
||||
- HF_TOKEN=${HF_TOKEN:-}
|
||||
volumes:
|
||||
- whisperx-cache:/root/.cache
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 18G
|
||||
networks:
|
||||
- dictia-network
|
||||
|
||||
dictia:
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: Dockerfile
|
||||
image: innova-ai/dictia:latest
|
||||
container_name: dictia
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8899:8899"
|
||||
env_file:
|
||||
- ../../.env
|
||||
environment:
|
||||
- LOG_LEVEL=${LOG_LEVEL:-ERROR}
|
||||
- ASR_BASE_URL=http://whisperx-asr:9000
|
||||
volumes:
|
||||
- ../../data/uploads:/data/uploads
|
||||
- ../../data/instance:/data/instance
|
||||
depends_on:
|
||||
- whisperx-asr
|
||||
healthcheck:
|
||||
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
networks:
|
||||
- dictia-network
|
||||
|
||||
volumes:
|
||||
whisperx-cache:
|
||||
|
||||
networks:
|
||||
dictia-network:
|
||||
driver: bridge
|
||||
69
deployment/docker/docker-compose.local-gpu.yml
Normal file
69
deployment/docker/docker-compose.local-gpu.yml
Normal file
@@ -0,0 +1,69 @@
|
||||
# DictIA — Local GPU deployment (WhisperX on NVIDIA GPU + DictIA)
|
||||
#
|
||||
# Usage:
|
||||
# docker compose -f deployment/docker/docker-compose.local-gpu.yml up -d
|
||||
#
|
||||
# Prerequisites:
|
||||
# - NVIDIA GPU with CUDA support
|
||||
# - nvidia-container-toolkit installed
|
||||
# - Docker configured with nvidia runtime
|
||||
|
||||
services:
|
||||
whisperx-asr:
|
||||
image: ghcr.io/jim60105/whisperx-asr:latest-cuda
|
||||
container_name: whisperx-asr
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "9000:9000"
|
||||
environment:
|
||||
- ASR_MODEL=${ASR_MODEL:-large-v3}
|
||||
- ASR_ENGINE=whisperx
|
||||
- DEVICE=cuda
|
||||
- COMPUTE_TYPE=float16
|
||||
- HF_TOKEN=${HF_TOKEN:-}
|
||||
volumes:
|
||||
- whisperx-cache:/root/.cache
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
networks:
|
||||
- dictia-network
|
||||
|
||||
dictia:
|
||||
build:
|
||||
context: ../..
|
||||
dockerfile: Dockerfile
|
||||
image: innova-ai/dictia:latest
|
||||
container_name: dictia
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8899:8899"
|
||||
env_file:
|
||||
- ../../.env
|
||||
environment:
|
||||
- LOG_LEVEL=${LOG_LEVEL:-ERROR}
|
||||
- ASR_BASE_URL=http://whisperx-asr:9000
|
||||
volumes:
|
||||
- ../../data/uploads:/data/uploads
|
||||
- ../../data/instance:/data/instance
|
||||
depends_on:
|
||||
- whisperx-asr
|
||||
healthcheck:
|
||||
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 30s
|
||||
networks:
|
||||
- dictia-network
|
||||
|
||||
volumes:
|
||||
whisperx-cache:
|
||||
|
||||
networks:
|
||||
dictia-network:
|
||||
driver: bridge
|
||||
118
deployment/docs/LOCAL-SETUP.md
Normal file
118
deployment/docs/LOCAL-SETUP.md
Normal file
@@ -0,0 +1,118 @@
|
||||
# Setup Local — DictIA
|
||||
|
||||
Guide pour deployer DictIA localement avec GPU NVIDIA ou CPU.
|
||||
|
||||
## Profil local-gpu
|
||||
|
||||
### Prerequis
|
||||
|
||||
- NVIDIA GPU avec support CUDA
|
||||
- [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
|
||||
- Docker + Docker Compose V2
|
||||
- 8GB+ RAM (16GB recommande)
|
||||
- Token HuggingFace (pour la diarisation)
|
||||
|
||||
### Installation nvidia-container-toolkit
|
||||
|
||||
```bash
|
||||
# Ubuntu/Debian
|
||||
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \
|
||||
sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
|
||||
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
|
||||
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
|
||||
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y nvidia-container-toolkit
|
||||
sudo nvidia-ctk runtime configure --runtime=docker
|
||||
sudo systemctl restart docker
|
||||
|
||||
# Verifier
|
||||
docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
|
||||
```
|
||||
|
||||
### Setup DictIA
|
||||
|
||||
```bash
|
||||
cd dictia
|
||||
bash deployment/setup.sh --profile local-gpu
|
||||
```
|
||||
|
||||
Le setup va verifier:
|
||||
- nvidia-container-toolkit installe
|
||||
- GPU accessible depuis Docker
|
||||
- Assez de RAM disponible
|
||||
|
||||
### Configuration du modele
|
||||
|
||||
Par defaut, WhisperX utilise `large-v3`. Pour changer:
|
||||
|
||||
```bash
|
||||
# Editer .env
|
||||
ASR_MODEL=large-v3 # Meilleure qualite
|
||||
# ASR_MODEL=medium # Plus rapide, qualite correcte
|
||||
# ASR_MODEL=small # Tres rapide, qualite reduite
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Profil local-cpu
|
||||
|
||||
### Prerequis
|
||||
|
||||
- Docker + Docker Compose V2
|
||||
- 18GB+ RAM (WhisperX CPU est gourmand)
|
||||
- Patience (transcription ~10x temps reel)
|
||||
|
||||
### Setup
|
||||
|
||||
```bash
|
||||
cd dictia
|
||||
bash deployment/setup.sh --profile local-cpu
|
||||
```
|
||||
|
||||
### Limitations
|
||||
|
||||
- Transcription lente: 1h d'audio prend ~10h
|
||||
- Utilise float32 (pas de GPU acceleration)
|
||||
- Limite memoire a 18GB par defaut
|
||||
- Recommande pour: tests, petits fichiers, demos
|
||||
|
||||
Pour reduire l'utilisation memoire, utiliser un modele plus petit:
|
||||
|
||||
```bash
|
||||
# Editer .env
|
||||
ASR_MODEL=small # ou medium, base, tiny
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Verification
|
||||
|
||||
```bash
|
||||
# Health check
|
||||
bash deployment/tools/health-check.sh
|
||||
|
||||
# Test rapide: ouvrir le navigateur
|
||||
open http://localhost:8899
|
||||
|
||||
# Verifier WhisperX
|
||||
curl http://localhost:9000/health
|
||||
```
|
||||
|
||||
## Gestion des containers
|
||||
|
||||
```bash
|
||||
COMPOSE_FILE=deployment/docker/docker-compose.local-gpu.yml # ou local-cpu
|
||||
|
||||
# Logs
|
||||
docker compose -f $COMPOSE_FILE logs -f
|
||||
|
||||
# Redemarrer
|
||||
docker compose -f $COMPOSE_FILE restart
|
||||
|
||||
# Arreter
|
||||
docker compose -f $COMPOSE_FILE down
|
||||
|
||||
# Voir l'utilisation GPU
|
||||
nvidia-smi # (profil GPU seulement)
|
||||
```
|
||||
136
deployment/docs/MAINTENANCE.md
Normal file
136
deployment/docs/MAINTENANCE.md
Normal file
@@ -0,0 +1,136 @@
|
||||
# Maintenance — DictIA
|
||||
|
||||
## Backup
|
||||
|
||||
```bash
|
||||
# Backup complet (data, .env, volumes, stats ASR)
|
||||
bash deployment/tools/backup.sh
|
||||
|
||||
# Backup dans un repertoire specifique
|
||||
bash deployment/tools/backup.sh /mnt/backups
|
||||
```
|
||||
|
||||
Les backups sont sauvegardes dans `backups/` avec rotation automatique (garde les 5 derniers).
|
||||
|
||||
Contenu d'un backup:
|
||||
- `data/` — uploads et base de donnees SQLite
|
||||
- `dot-env` — fichier de configuration
|
||||
- `asr-usage-stats.json` — stats d'utilisation GPU
|
||||
- `whisperx-cache.tar.gz` — cache modeles (si volume Docker)
|
||||
- `manifest.json` — metadonnees du backup
|
||||
|
||||
### Schedule recommande
|
||||
|
||||
| Frequence | Action |
|
||||
|-----------|--------|
|
||||
| Quotidien | `bash deployment/tools/backup.sh` |
|
||||
| Hebdomadaire | Copier le backup sur un stockage externe |
|
||||
| Mensuel | Verifier la restauration sur un environnement de test |
|
||||
|
||||
Pour automatiser avec cron:
|
||||
|
||||
```bash
|
||||
# Backup quotidien a 3h du matin
|
||||
0 3 * * * /opt/dictia/deployment/tools/backup.sh >> /var/log/dictia-backup.log 2>&1
|
||||
```
|
||||
|
||||
## Restore
|
||||
|
||||
```bash
|
||||
# Lister les backups disponibles
|
||||
ls -la backups/
|
||||
|
||||
# Restaurer un backup
|
||||
bash deployment/tools/restore.sh backups/dictia-20260211-030000.tar.gz
|
||||
```
|
||||
|
||||
Le script:
|
||||
1. Valide l'archive (presence du manifest)
|
||||
2. Demande confirmation
|
||||
3. Arrete les containers
|
||||
4. Restaure les fichiers
|
||||
5. Redemarre les containers
|
||||
|
||||
## Mise a jour
|
||||
|
||||
```bash
|
||||
# Mise a jour complete (git pull + rebuild + restart)
|
||||
bash deployment/tools/update.sh
|
||||
|
||||
# Rebuild seulement (sans git pull)
|
||||
bash deployment/tools/update.sh --no-pull
|
||||
|
||||
# Git pull seulement (sans rebuild)
|
||||
bash deployment/tools/update.sh --no-build
|
||||
```
|
||||
|
||||
Le script:
|
||||
1. Detecte le profil actif automatiquement
|
||||
2. `git pull origin dictia-branding`
|
||||
3. `docker build -t innova-ai/dictia:latest .`
|
||||
4. Pull WhisperX upstream (profils locaux)
|
||||
5. `docker compose down && up -d`
|
||||
6. Attend le health check
|
||||
7. Nettoie les images dangling
|
||||
|
||||
## Monitoring
|
||||
|
||||
### Health check
|
||||
|
||||
```bash
|
||||
# Diagnostic complet (humain)
|
||||
bash deployment/tools/health-check.sh
|
||||
|
||||
# JSON (pour alertes/scripts)
|
||||
bash deployment/tools/health-check.sh --json
|
||||
|
||||
# Code de sortie seulement (0=ok, 1=probleme)
|
||||
bash deployment/tools/health-check.sh --quiet
|
||||
```
|
||||
|
||||
### Logs
|
||||
|
||||
```bash
|
||||
# DictIA
|
||||
docker logs dictia -f --tail 100
|
||||
|
||||
# WhisperX (profils locaux)
|
||||
docker logs whisperx-asr -f --tail 100
|
||||
|
||||
# ASR Proxy (profil cloud)
|
||||
journalctl -u asr-proxy -f
|
||||
```
|
||||
|
||||
### Dashboard GPU (profil cloud)
|
||||
|
||||
Le dashboard de monitoring GPU est accessible a:
|
||||
- `http://localhost:9090` (local)
|
||||
- `https://votre-hostname.tailnet.ts.net:9443` (Tailscale)
|
||||
|
||||
Affiche: statut GPU, cout mensuel, historique des requetes, zones de fallback.
|
||||
|
||||
### Metriques cles
|
||||
|
||||
```bash
|
||||
# Espace disque (les transcriptions grossissent)
|
||||
df -h /opt/dictia/data/
|
||||
|
||||
# Utilisation memoire (WhisperX est gourmand)
|
||||
docker stats --no-stream
|
||||
|
||||
# Stats GPU (profil cloud)
|
||||
curl -s http://localhost:9090/stats | python3 -m json.tool
|
||||
```
|
||||
|
||||
## Maintenance Docker
|
||||
|
||||
```bash
|
||||
# Nettoyer les images orphelines
|
||||
docker image prune -f
|
||||
|
||||
# Nettoyer tout (attention: supprime les volumes non utilises)
|
||||
# docker system prune -a --volumes
|
||||
|
||||
# Verifier l'espace Docker
|
||||
docker system df
|
||||
```
|
||||
90
deployment/docs/QUICKSTART.md
Normal file
90
deployment/docs/QUICKSTART.md
Normal file
@@ -0,0 +1,90 @@
|
||||
# Quickstart — DictIA
|
||||
|
||||
## Prerequis communs
|
||||
|
||||
- Docker + Docker Compose V2
|
||||
- Git
|
||||
- 2GB+ RAM disponible
|
||||
|
||||
```bash
|
||||
git clone https://gitea.innova-ai.ca/Innova-AI/dictia.git
|
||||
cd dictia
|
||||
git checkout dictia-branding
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Profil Cloud (VPS + GCP GPU)
|
||||
|
||||
Le GPU demarre automatiquement quand quelqu'un transcrit, et s'arrete apres 5 min d'inactivite.
|
||||
|
||||
```bash
|
||||
# 1. Setup interactif
|
||||
bash deployment/setup.sh --profile cloud
|
||||
|
||||
# 2. Setup ASR Proxy (GCP credentials requises)
|
||||
bash deployment/asr-proxy/setup.sh
|
||||
|
||||
# 3. Optionnel: Tailscale Serve pour HTTPS
|
||||
bash deployment/config/tailscale/setup-serve.sh
|
||||
```
|
||||
|
||||
**Requis**: credentials GCP (service account ou OAuth) dans `deployment/asr-proxy/gcp-credentials.json`.
|
||||
|
||||
---
|
||||
|
||||
## Profil Local GPU
|
||||
|
||||
Transcription locale sur GPU NVIDIA. Le plus rapide.
|
||||
|
||||
```bash
|
||||
# Prerequis: nvidia-container-toolkit
|
||||
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html
|
||||
|
||||
# Setup
|
||||
bash deployment/setup.sh --profile local-gpu
|
||||
```
|
||||
|
||||
**Requis**: token HuggingFace pour la diarisation (pyannote).
|
||||
|
||||
---
|
||||
|
||||
## Profil Local CPU
|
||||
|
||||
Transcription sur CPU. Lent mais fonctionnel pour tester.
|
||||
|
||||
```bash
|
||||
bash deployment/setup.sh --profile local-cpu
|
||||
```
|
||||
|
||||
Prevoir ~10x le temps reel (1h audio = ~10h de traitement).
|
||||
|
||||
---
|
||||
|
||||
## Apres l'installation
|
||||
|
||||
```bash
|
||||
# Verifier que tout fonctionne
|
||||
bash deployment/tools/health-check.sh
|
||||
|
||||
# Ouvrir DictIA
|
||||
open http://localhost:8899
|
||||
```
|
||||
|
||||
Se connecter avec les identifiants admin configures pendant le setup.
|
||||
|
||||
## Commandes utiles
|
||||
|
||||
```bash
|
||||
# Logs en temps reel
|
||||
docker compose -f deployment/docker/docker-compose.<profil>.yml logs -f
|
||||
|
||||
# Redemarrer
|
||||
docker compose -f deployment/docker/docker-compose.<profil>.yml restart
|
||||
|
||||
# Mise a jour
|
||||
bash deployment/tools/update.sh
|
||||
|
||||
# Backup
|
||||
bash deployment/tools/backup.sh
|
||||
```
|
||||
177
deployment/docs/TROUBLESHOOTING.md
Normal file
177
deployment/docs/TROUBLESHOOTING.md
Normal file
@@ -0,0 +1,177 @@
|
||||
# Troubleshooting — DictIA
|
||||
|
||||
## WhisperX OOM (Out of Memory)
|
||||
|
||||
**Symptome**: Container `whisperx-asr` crash ou restart en boucle.
|
||||
|
||||
**Cause**: Modele trop gros pour la RAM/VRAM disponible.
|
||||
|
||||
**Solutions**:
|
||||
```bash
|
||||
# Utiliser un modele plus petit dans .env
|
||||
ASR_MODEL=medium # au lieu de large-v3
|
||||
|
||||
# Augmenter la limite memoire (local-cpu)
|
||||
# Editer docker-compose.local-cpu.yml
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 24G # au lieu de 18G
|
||||
```
|
||||
|
||||
## Diarisation 403 Forbidden
|
||||
|
||||
**Symptome**: Erreur 403 lors de la transcription avec diarisation.
|
||||
|
||||
**Cause**: Token HuggingFace manquant ou conditions non acceptees.
|
||||
|
||||
**Solution**:
|
||||
1. Creer un token: https://huggingface.co/settings/tokens
|
||||
2. Accepter les conditions: https://huggingface.co/pyannote/speaker-diarization-3.1
|
||||
3. Ajouter dans `.env`:
|
||||
```bash
|
||||
HF_TOKEN=hf_votre_token
|
||||
```
|
||||
4. Redemarrer: `docker compose -f deployment/docker/docker-compose.<profil>.yml restart`
|
||||
|
||||
## GPU non detecte (local-gpu)
|
||||
|
||||
**Symptome**: `nvidia-smi` fonctionne mais Docker ne voit pas le GPU.
|
||||
|
||||
**Solution**:
|
||||
```bash
|
||||
# Installer nvidia-container-toolkit
|
||||
sudo apt install -y nvidia-container-toolkit
|
||||
sudo nvidia-ctk runtime configure --runtime=docker
|
||||
sudo systemctl restart docker
|
||||
|
||||
# Verifier
|
||||
docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
|
||||
```
|
||||
|
||||
## Upload echoue (fichiers volumineux)
|
||||
|
||||
**Symptome**: Upload de gros fichiers (>100MB) echoue.
|
||||
|
||||
**Causes possibles**:
|
||||
- Timeout Nginx/reverse proxy
|
||||
- Limite upload trop basse
|
||||
|
||||
**Solutions**:
|
||||
```bash
|
||||
# Si Nginx: verifier client_max_body_size dans dictia.conf
|
||||
client_max_body_size 500M;
|
||||
|
||||
# Si Tailscale Serve: pas de limite cote Tailscale
|
||||
|
||||
# Timeout gunicorn (dans le Dockerfile, deja a 600s)
|
||||
# Pour des fichiers tres longs, augmenter dans docker-compose:
|
||||
environment:
|
||||
- GUNICORN_TIMEOUT=1200
|
||||
```
|
||||
|
||||
## Container dictia "unhealthy"
|
||||
|
||||
**Symptome**: `docker ps` montre "unhealthy" pour le container dictia.
|
||||
|
||||
**Diagnostic**:
|
||||
```bash
|
||||
# Voir les logs
|
||||
docker logs dictia --tail 50
|
||||
|
||||
# Tester manuellement
|
||||
docker exec dictia python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"
|
||||
```
|
||||
|
||||
**Causes courantes**:
|
||||
- `.env` mal configure (SECRET_KEY manquant)
|
||||
- Base de donnees corrompue (restaurer backup)
|
||||
- Port 8899 deja utilise
|
||||
|
||||
## ASR Proxy: "No GPU available"
|
||||
|
||||
**Symptome**: Transcription echoue avec "No GPU available in any Canadian zone".
|
||||
|
||||
**Causes**:
|
||||
- GCP n'a pas de GPU disponible (capacite epuisee)
|
||||
- Credentials GCP expirees
|
||||
- Budget mensuel atteint
|
||||
|
||||
**Diagnostic**:
|
||||
```bash
|
||||
# Verifier le statut du proxy
|
||||
curl -s http://localhost:9090/health | python3 -m json.tool
|
||||
|
||||
# Verifier les stats (budget)
|
||||
curl -s http://localhost:9090/stats | python3 -m json.tool
|
||||
|
||||
# Voir les logs
|
||||
journalctl -u asr-proxy --since "1 hour ago"
|
||||
```
|
||||
|
||||
**Solutions**:
|
||||
- Attendre (GCP libere des GPUs regulierement)
|
||||
- Le proxy reessaie automatiquement apres un cooldown de 3 minutes
|
||||
- Verifier le dashboard: http://localhost:9090
|
||||
|
||||
## Build Docker lent/echoue
|
||||
|
||||
**Symptome**: `docker build` prend trop de temps ou echoue.
|
||||
|
||||
**Solutions**:
|
||||
```bash
|
||||
# Limiter les ressources si le VPS est petit
|
||||
docker build --memory=2g --cpus=2 -t innova-ai/dictia:latest .
|
||||
|
||||
# Nettoyer le cache Docker si le disque est plein
|
||||
docker builder prune -f
|
||||
docker image prune -f
|
||||
```
|
||||
|
||||
## Base de donnees corrompue
|
||||
|
||||
**Symptome**: Erreur SQLite au demarrage.
|
||||
|
||||
**Solution**:
|
||||
```bash
|
||||
# Restaurer le dernier backup
|
||||
bash deployment/tools/restore.sh backups/dictia-LATEST.tar.gz
|
||||
|
||||
# Ou recreer la base (perd les donnees)
|
||||
rm data/instance/transcriptions.db
|
||||
docker compose -f deployment/docker/docker-compose.<profil>.yml restart
|
||||
```
|
||||
|
||||
## Port 8899 deja utilise
|
||||
|
||||
```bash
|
||||
# Trouver qui utilise le port
|
||||
sudo lsof -i :8899
|
||||
# ou
|
||||
sudo ss -tlnp | grep 8899
|
||||
|
||||
# Arreter le processus ou changer le port dans docker-compose
|
||||
ports:
|
||||
- "8900:8899" # utiliser 8900 a la place
|
||||
```
|
||||
|
||||
## Mise a jour qui casse tout
|
||||
|
||||
```bash
|
||||
# Rollback: revenir au commit precedent
|
||||
cd dictia
|
||||
git log --oneline -5 # trouver le bon commit
|
||||
git checkout <commit-hash>
|
||||
|
||||
# Rebuild et redemarrer
|
||||
docker build -t innova-ai/dictia:latest .
|
||||
docker compose -f deployment/docker/docker-compose.<profil>.yml down
|
||||
docker compose -f deployment/docker/docker-compose.<profil>.yml up -d
|
||||
```
|
||||
|
||||
## Commande de diagnostic rapide
|
||||
|
||||
```bash
|
||||
# Tout verifier d'un coup
|
||||
bash deployment/tools/health-check.sh --json | python3 -m json.tool
|
||||
```
|
||||
148
deployment/docs/VPS-SETUP.md
Normal file
148
deployment/docs/VPS-SETUP.md
Normal file
@@ -0,0 +1,148 @@
|
||||
# Setup VPS from scratch — DictIA
|
||||
|
||||
Guide complet pour deployer DictIA sur un VPS Ubuntu.
|
||||
Teste sur OVH VPS avec Ubuntu 22.04/24.04.
|
||||
|
||||
## 1. Preparation du VPS
|
||||
|
||||
```bash
|
||||
# Mise a jour systeme
|
||||
sudo apt update && sudo apt upgrade -y
|
||||
|
||||
# Installer les essentiels
|
||||
sudo apt install -y curl git
|
||||
```
|
||||
|
||||
## 2. Docker
|
||||
|
||||
```bash
|
||||
# Installer Docker (methode officielle)
|
||||
curl -fsSL https://get.docker.com | sh
|
||||
|
||||
# Ajouter l'utilisateur au groupe docker
|
||||
sudo usermod -aG docker $USER
|
||||
|
||||
# Se reconnecter pour appliquer le groupe
|
||||
exit
|
||||
# (reconnecter via SSH)
|
||||
|
||||
# Verifier
|
||||
docker --version
|
||||
docker compose version
|
||||
```
|
||||
|
||||
## 3. Tailscale (recommande)
|
||||
|
||||
Tailscale fournit un VPN mesh pour acceder au VPS sans exposer de ports publics.
|
||||
|
||||
```bash
|
||||
# Installer Tailscale
|
||||
curl -fsSL https://tailscale.com/install.sh | sh
|
||||
|
||||
# Connecter au tailnet
|
||||
sudo tailscale up
|
||||
|
||||
# Verifier
|
||||
tailscale status
|
||||
```
|
||||
|
||||
## 4. DictIA
|
||||
|
||||
```bash
|
||||
# Cloner le repo
|
||||
cd ~
|
||||
git clone https://gitea.innova-ai.ca/Innova-AI/dictia.git
|
||||
cd dictia
|
||||
git checkout dictia-branding
|
||||
|
||||
# Lancer le setup
|
||||
bash deployment/setup.sh --profile cloud
|
||||
```
|
||||
|
||||
Le setup va:
|
||||
- Generer le `.env` avec vos identifiants
|
||||
- Creer les repertoires de donnees
|
||||
- Builder l'image Docker
|
||||
- Demarrer les containers
|
||||
|
||||
## 5. ASR Proxy (GCP GPU)
|
||||
|
||||
```bash
|
||||
# Installer le proxy
|
||||
bash deployment/asr-proxy/setup.sh
|
||||
|
||||
# Ajouter les credentials GCP
|
||||
# Copier votre fichier de credentials dans:
|
||||
cp ~/gcp-credentials.json deployment/asr-proxy/gcp-credentials.json
|
||||
|
||||
# Demarrer le service
|
||||
sudo systemctl start asr-proxy
|
||||
sudo systemctl status asr-proxy
|
||||
```
|
||||
|
||||
## 6. Securite
|
||||
|
||||
```bash
|
||||
# Docker daemon config (log rotation)
|
||||
sudo cp deployment/security/docker-daemon.json /etc/docker/daemon.json
|
||||
sudo systemctl restart docker
|
||||
|
||||
# Firewall iptables (bloque trafic non-Tailscale)
|
||||
sudo bash deployment/security/iptables-rules.sh
|
||||
|
||||
# Service systemd pour les regles au boot
|
||||
sudo cp deployment/security/docker-iptables.service /etc/systemd/system/
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable docker-iptables
|
||||
```
|
||||
|
||||
## 7. Tailscale Serve (HTTPS)
|
||||
|
||||
```bash
|
||||
# Expose DictIA et le dashboard ASR via Tailscale HTTPS
|
||||
bash deployment/config/tailscale/setup-serve.sh
|
||||
|
||||
# Verifier
|
||||
tailscale serve status
|
||||
```
|
||||
|
||||
DictIA sera accessible a `https://votre-hostname.tailnet.ts.net/`.
|
||||
|
||||
## 8. Service systemd (auto-start)
|
||||
|
||||
```bash
|
||||
# Adapter le chemin dans le fichier si necessaire
|
||||
sudo cp deployment/config/systemd/dictia.service /etc/systemd/system/
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable dictia
|
||||
```
|
||||
|
||||
## 9. Verification
|
||||
|
||||
```bash
|
||||
# Health check complet
|
||||
bash deployment/tools/health-check.sh
|
||||
|
||||
# Verifier les endpoints
|
||||
curl -s http://localhost:8899/health
|
||||
curl -s http://localhost:9090/health
|
||||
```
|
||||
|
||||
## 10. Premier backup
|
||||
|
||||
```bash
|
||||
bash deployment/tools/backup.sh
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Checklist post-installation
|
||||
|
||||
- [ ] DictIA repond sur :8899
|
||||
- [ ] ASR Proxy repond sur :9090
|
||||
- [ ] Tailscale Serve configure
|
||||
- [ ] iptables: seul Tailscale peut acceder
|
||||
- [ ] Docker: log rotation configuree
|
||||
- [ ] Service systemd enable (auto-start au boot)
|
||||
- [ ] Premier backup effectue
|
||||
- [ ] Identifiants admin testes
|
||||
101
deployment/profiles/docker-compose.dictia16.yml
Normal file
101
deployment/profiles/docker-compose.dictia16.yml
Normal file
@@ -0,0 +1,101 @@
|
||||
# =============================================================================
|
||||
# DictIA 16 — Docker Compose
|
||||
# GPU : RTX 5070 Ti (16 Go VRAM)
|
||||
# =============================================================================
|
||||
#
|
||||
# Services :
|
||||
# - dictia : Application principale DictIA
|
||||
# - whisperx-asr : Service de transcription WhisperX Large-v3
|
||||
# - ollama : LLM local Mistral 7B (résumés, chat, Q&A)
|
||||
#
|
||||
# Démarrage :
|
||||
# 1. cp config/env.dictia16.example .env
|
||||
# 2. docker compose -f config/docker-compose.dictia16.yml up -d
|
||||
# 3. Télécharger Mistral : docker exec ollama ollama pull mistral
|
||||
#
|
||||
# Note : Aucune clé API nécessaire — tout tourne en local (100% privé).
|
||||
# =============================================================================
|
||||
|
||||
services:
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Application DictIA
|
||||
# ---------------------------------------------------------------------------
|
||||
dictia:
|
||||
image: dictia:latest
|
||||
container_name: dictia
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8899:8899"
|
||||
env_file:
|
||||
- ../.env
|
||||
environment:
|
||||
- LOG_LEVEL=ERROR
|
||||
volumes:
|
||||
- ../uploads:/data/uploads
|
||||
- ../instance:/data/instance
|
||||
# Décommenter pour l'export automatique :
|
||||
# - ../exports:/data/exports
|
||||
# Décommenter pour le traitement automatique :
|
||||
# - ../auto-process:/data/auto-process
|
||||
depends_on:
|
||||
- whisperx-asr
|
||||
- ollama
|
||||
networks:
|
||||
- dictia-net
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WhisperX ASR — Transcription locale (WhisperX Large-v3)
|
||||
# RTX 5070 Ti : BATCH_SIZE=32, COMPUTE_TYPE=float16
|
||||
# ---------------------------------------------------------------------------
|
||||
whisperx-asr:
|
||||
image: murtazanasir/whisperx-asr-service:latest
|
||||
container_name: whisperx-asr
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- HF_TOKEN=${HF_TOKEN}
|
||||
- DEVICE=cuda
|
||||
- COMPUTE_TYPE=float16
|
||||
- BATCH_SIZE=32
|
||||
- DEFAULT_MODEL=large-v3
|
||||
volumes:
|
||||
- whisperx-models:/root/.cache
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
networks:
|
||||
- dictia-net
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ollama — LLM local Mistral 7B
|
||||
# Résumés, points d'action, Q&A — 100% local, aucune donnée externe
|
||||
# ---------------------------------------------------------------------------
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
container_name: ollama
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- ollama-models:/root/.ollama
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
networks:
|
||||
- dictia-net
|
||||
|
||||
networks:
|
||||
dictia-net:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
whisperx-models:
|
||||
driver: local
|
||||
ollama-models:
|
||||
driver: local
|
||||
75
deployment/profiles/docker-compose.dictia8.yml
Normal file
75
deployment/profiles/docker-compose.dictia8.yml
Normal file
@@ -0,0 +1,75 @@
|
||||
# =============================================================================
|
||||
# DictIA 8 — Docker Compose
|
||||
# GPU : RTX 5060 (8 Go VRAM)
|
||||
# =============================================================================
|
||||
#
|
||||
# Services :
|
||||
# - dictia : Application principale DictIA
|
||||
# - whisperx-asr : Service de transcription WhisperX Large-v3
|
||||
#
|
||||
# Démarrage :
|
||||
# 1. cp config/env.dictia8.example .env
|
||||
# 2. Remplir TEXT_MODEL_API_KEY dans .env
|
||||
# 3. docker compose -f config/docker-compose.dictia8.yml up -d
|
||||
# =============================================================================
|
||||
|
||||
services:
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Application DictIA
|
||||
# ---------------------------------------------------------------------------
|
||||
dictia:
|
||||
image: dictia:latest
|
||||
container_name: dictia
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "8899:8899"
|
||||
env_file:
|
||||
- ../.env
|
||||
environment:
|
||||
- LOG_LEVEL=ERROR
|
||||
volumes:
|
||||
- ../uploads:/data/uploads
|
||||
- ../instance:/data/instance
|
||||
# Décommenter pour l'export automatique :
|
||||
# - ../exports:/data/exports
|
||||
# Décommenter pour le traitement automatique :
|
||||
# - ../auto-process:/data/auto-process
|
||||
depends_on:
|
||||
- whisperx-asr
|
||||
networks:
|
||||
- dictia-net
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# WhisperX ASR — Transcription locale (WhisperX Large-v3)
|
||||
# RTX 5060 : BATCH_SIZE=16, COMPUTE_TYPE=float16
|
||||
# ---------------------------------------------------------------------------
|
||||
whisperx-asr:
|
||||
image: murtazanasir/whisperx-asr-service:latest
|
||||
container_name: whisperx-asr
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- HF_TOKEN=${HF_TOKEN}
|
||||
- DEVICE=cuda
|
||||
- COMPUTE_TYPE=float16
|
||||
- BATCH_SIZE=16
|
||||
- DEFAULT_MODEL=large-v3
|
||||
volumes:
|
||||
- whisperx-models:/root/.cache
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
networks:
|
||||
- dictia-net
|
||||
|
||||
networks:
|
||||
dictia-net:
|
||||
driver: bridge
|
||||
|
||||
volumes:
|
||||
whisperx-models:
|
||||
driver: local
|
||||
134
deployment/profiles/env.dictia16.example
Normal file
134
deployment/profiles/env.dictia16.example
Normal file
@@ -0,0 +1,134 @@
|
||||
# =============================================================================
|
||||
# DictIA 16 — Configuration (.env)
|
||||
# GPU : RTX 5070 Ti (16 Go VRAM)
|
||||
# =============================================================================
|
||||
#
|
||||
# Architecture :
|
||||
# - Transcription : WhisperX Large-v3 (local, ~5,5 Go VRAM)
|
||||
# - LLM (résumés) : Mistral 7B local via Ollama (~6,4 Go VRAM)
|
||||
# - Mode : Séquentiel (transcription puis résumé)
|
||||
# - Total VRAM : ~11,9 Go / 16 Go (marge ~4,1 Go)
|
||||
#
|
||||
# Démarrage rapide :
|
||||
# 1. cp config/env.dictia16.example .env
|
||||
# 2. Aucune clé API nécessaire — tout tourne en local
|
||||
# 3. docker compose -f config/docker-compose.dictia16.yml up -d
|
||||
# =============================================================================
|
||||
|
||||
# =============================================================================
|
||||
# MODÈLE DE TEXTE — Résumés, titres, chat (LLM LOCAL)
|
||||
# =============================================================================
|
||||
# DictIA 16 utilise Mistral 7B en local via Ollama.
|
||||
# Aucune donnée ne quitte le serveur — 100% privé.
|
||||
|
||||
TEXT_MODEL_BASE_URL=http://ollama:11434/v1
|
||||
TEXT_MODEL_API_KEY=not-required
|
||||
TEXT_MODEL_NAME=mistral
|
||||
|
||||
# --- Modèle de chat séparé (optionnel) ---
|
||||
# Même modèle par défaut, mais peut être changé pour un modèle plus rapide.
|
||||
# CHAT_MODEL_API_KEY=not-required
|
||||
# CHAT_MODEL_BASE_URL=http://ollama:11434/v1
|
||||
# CHAT_MODEL_NAME=mistral
|
||||
|
||||
# =============================================================================
|
||||
# TRANSCRIPTION — WhisperX ASR local (REQUIS)
|
||||
# =============================================================================
|
||||
# WhisperX tourne en local dans un conteneur Docker séparé.
|
||||
# Le service ASR est défini dans docker-compose.dictia16.yml.
|
||||
|
||||
ASR_BASE_URL=http://whisperx-asr:9000
|
||||
|
||||
# Diarisation (identification automatique des locuteurs) — recommandé
|
||||
ASR_DIARIZE=true
|
||||
ASR_RETURN_SPEAKER_EMBEDDINGS=true
|
||||
|
||||
# Nombre de locuteurs attendus (optionnel — aide la précision)
|
||||
# ASR_MIN_SPEAKERS=1
|
||||
# ASR_MAX_SPEAKERS=6
|
||||
|
||||
# =============================================================================
|
||||
# PARAMÈTRES ADMINISTRATEUR
|
||||
# =============================================================================
|
||||
ADMIN_USERNAME=admin
|
||||
ADMIN_EMAIL=admin@votreentreprise.com
|
||||
ADMIN_PASSWORD=changeme
|
||||
|
||||
# =============================================================================
|
||||
# ACCÈS ET INSCRIPTION
|
||||
# =============================================================================
|
||||
# Désactiver l'inscription publique (accès sur invitation uniquement)
|
||||
ALLOW_REGISTRATION=false
|
||||
|
||||
# Restreindre l'inscription aux domaines autorisés
|
||||
# Exemple : REGISTRATION_ALLOWED_DOMAINS=votreentreprise.com
|
||||
REGISTRATION_ALLOWED_DOMAINS=
|
||||
|
||||
# =============================================================================
|
||||
# FUSEAU HORAIRE
|
||||
# =============================================================================
|
||||
# Exemples : America/Toronto, America/Montreal, America/New_York, UTC
|
||||
TIMEZONE="America/Toronto"
|
||||
|
||||
# =============================================================================
|
||||
# LIMITES DE TOKENS
|
||||
# =============================================================================
|
||||
SUMMARY_MAX_TOKENS=8000
|
||||
CHAT_MAX_TOKENS=5000
|
||||
|
||||
# =============================================================================
|
||||
# COMPRESSION AUDIO
|
||||
# =============================================================================
|
||||
AUDIO_COMPRESS_UPLOADS=true
|
||||
AUDIO_CODEC=mp3
|
||||
AUDIO_BITRATE=128k
|
||||
|
||||
# =============================================================================
|
||||
# FONCTIONNALITÉS OPTIONNELLES
|
||||
# =============================================================================
|
||||
|
||||
# Inquire Mode — recherche IA sur tous les enregistrements
|
||||
# Peut être activé sur DictIA 16 (plus de VRAM disponible)
|
||||
ENABLE_INQUIRE_MODE=false
|
||||
|
||||
# Traitement automatique de fichiers (dossier surveillé)
|
||||
ENABLE_AUTO_PROCESSING=false
|
||||
# AUTO_PROCESS_MODE=admin_only
|
||||
# AUTO_PROCESS_WATCH_DIR=/data/auto-process
|
||||
|
||||
# Export automatique
|
||||
ENABLE_AUTO_EXPORT=false
|
||||
# AUTO_EXPORT_DIR=/data/exports
|
||||
# AUTO_EXPORT_TRANSCRIPTION=true
|
||||
# AUTO_EXPORT_SUMMARY=true
|
||||
|
||||
# Suppression automatique / rétention
|
||||
ENABLE_AUTO_DELETION=false
|
||||
# GLOBAL_RETENTION_DAYS=90
|
||||
# DELETION_MODE=audio_only
|
||||
|
||||
# =============================================================================
|
||||
# PARTAGE
|
||||
# =============================================================================
|
||||
ENABLE_INTERNAL_SHARING=false
|
||||
ENABLE_PUBLIC_SHARING=true
|
||||
USERS_CAN_DELETE=true
|
||||
|
||||
# =============================================================================
|
||||
# FILES D'ATTENTE DE TRAITEMENT
|
||||
# =============================================================================
|
||||
JOB_QUEUE_WORKERS=2
|
||||
SUMMARY_QUEUE_WORKERS=2
|
||||
JOB_MAX_RETRIES=3
|
||||
|
||||
# =============================================================================
|
||||
# BASE DE DONNÉES ET STOCKAGE
|
||||
# =============================================================================
|
||||
SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
|
||||
UPLOAD_FOLDER=/data/uploads
|
||||
|
||||
# =============================================================================
|
||||
# JOURNALISATION
|
||||
# =============================================================================
|
||||
# ERROR = production (minimal), INFO = débogage, DEBUG = développement
|
||||
LOG_LEVEL=ERROR
|
||||
126
deployment/profiles/env.dictia8.example
Normal file
126
deployment/profiles/env.dictia8.example
Normal file
@@ -0,0 +1,126 @@
|
||||
# =============================================================================
|
||||
# DictIA 8 — Configuration (.env)
|
||||
# GPU : RTX 5060 (8 Go VRAM)
|
||||
# =============================================================================
|
||||
#
|
||||
# Architecture :
|
||||
# - Transcription : WhisperX Large-v3 (local, ~5,5 Go VRAM)
|
||||
# - LLM (résumés) : API cloud via OpenRouter (VRAM insuffisante pour LLM local)
|
||||
#
|
||||
# Démarrage rapide :
|
||||
# 1. cp config/env.dictia8.example .env
|
||||
# 2. Remplir TRANSCRIPTION_API_KEY et TEXT_MODEL_API_KEY
|
||||
# 3. docker compose -f config/docker-compose.dictia8.yml up -d
|
||||
# =============================================================================
|
||||
|
||||
# =============================================================================
|
||||
# MODÈLE DE TEXTE — Résumés, titres, chat (REQUIS)
|
||||
# =============================================================================
|
||||
# DictIA 8 utilise un LLM cloud via OpenRouter (VRAM insuffisante pour LLM local).
|
||||
# Inscrivez-vous sur https://openrouter.ai pour obtenir une clé API.
|
||||
|
||||
TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
|
||||
TEXT_MODEL_API_KEY=votre_cle_openrouter
|
||||
TEXT_MODEL_NAME=openai/gpt-4o-mini
|
||||
|
||||
# =============================================================================
|
||||
# TRANSCRIPTION — WhisperX ASR local (REQUIS)
|
||||
# =============================================================================
|
||||
# WhisperX tourne en local dans un conteneur Docker séparé.
|
||||
# Le service ASR est défini dans docker-compose.dictia8.yml.
|
||||
|
||||
ASR_BASE_URL=http://whisperx-asr:9000
|
||||
|
||||
# Diarisation (identification automatique des locuteurs) — recommandé
|
||||
ASR_DIARIZE=true
|
||||
ASR_RETURN_SPEAKER_EMBEDDINGS=true
|
||||
|
||||
# Nombre de locuteurs attendus (optionnel — aide la précision)
|
||||
# ASR_MIN_SPEAKERS=1
|
||||
# ASR_MAX_SPEAKERS=6
|
||||
|
||||
# =============================================================================
|
||||
# PARAMÈTRES ADMINISTRATEUR
|
||||
# =============================================================================
|
||||
ADMIN_USERNAME=admin
|
||||
ADMIN_EMAIL=admin@votreentreprise.com
|
||||
ADMIN_PASSWORD=changeme
|
||||
|
||||
# =============================================================================
|
||||
# ACCÈS ET INSCRIPTION
|
||||
# =============================================================================
|
||||
# Désactiver l'inscription publique (accès sur invitation uniquement)
|
||||
ALLOW_REGISTRATION=false
|
||||
|
||||
# Restreindre l'inscription aux domaines autorisés
|
||||
# Exemple : REGISTRATION_ALLOWED_DOMAINS=votreentreprise.com
|
||||
REGISTRATION_ALLOWED_DOMAINS=
|
||||
|
||||
# =============================================================================
|
||||
# FUSEAU HORAIRE
|
||||
# =============================================================================
|
||||
# Exemples : America/Toronto, America/Montreal, America/New_York, UTC
|
||||
TIMEZONE="America/Toronto"
|
||||
|
||||
# =============================================================================
|
||||
# LIMITES DE TOKENS
|
||||
# =============================================================================
|
||||
SUMMARY_MAX_TOKENS=8000
|
||||
CHAT_MAX_TOKENS=5000
|
||||
|
||||
# =============================================================================
|
||||
# COMPRESSION AUDIO
|
||||
# =============================================================================
|
||||
AUDIO_COMPRESS_UPLOADS=true
|
||||
AUDIO_CODEC=mp3
|
||||
AUDIO_BITRATE=128k
|
||||
|
||||
# =============================================================================
|
||||
# FONCTIONNALITÉS OPTIONNELLES
|
||||
# =============================================================================
|
||||
|
||||
# Inquire Mode — recherche IA sur tous les enregistrements
|
||||
# Désactivé sur DictIA 8 (VRAM insuffisante pour embeddings locaux)
|
||||
ENABLE_INQUIRE_MODE=false
|
||||
|
||||
# Traitement automatique de fichiers (dossier surveillé)
|
||||
ENABLE_AUTO_PROCESSING=false
|
||||
# AUTO_PROCESS_MODE=admin_only
|
||||
# AUTO_PROCESS_WATCH_DIR=/data/auto-process
|
||||
|
||||
# Export automatique
|
||||
ENABLE_AUTO_EXPORT=false
|
||||
# AUTO_EXPORT_DIR=/data/exports
|
||||
# AUTO_EXPORT_TRANSCRIPTION=true
|
||||
# AUTO_EXPORT_SUMMARY=true
|
||||
|
||||
# Suppression automatique / rétention
|
||||
ENABLE_AUTO_DELETION=false
|
||||
# GLOBAL_RETENTION_DAYS=90
|
||||
# DELETION_MODE=audio_only
|
||||
|
||||
# =============================================================================
|
||||
# PARTAGE
|
||||
# =============================================================================
|
||||
ENABLE_INTERNAL_SHARING=false
|
||||
ENABLE_PUBLIC_SHARING=true
|
||||
USERS_CAN_DELETE=true
|
||||
|
||||
# =============================================================================
|
||||
# FILES D'ATTENTE DE TRAITEMENT
|
||||
# =============================================================================
|
||||
JOB_QUEUE_WORKERS=2
|
||||
SUMMARY_QUEUE_WORKERS=2
|
||||
JOB_MAX_RETRIES=3
|
||||
|
||||
# =============================================================================
|
||||
# BASE DE DONNÉES ET STOCKAGE
|
||||
# =============================================================================
|
||||
SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
|
||||
UPLOAD_FOLDER=/data/uploads
|
||||
|
||||
# =============================================================================
|
||||
# JOURNALISATION
|
||||
# =============================================================================
|
||||
# ERROR = production (minimal), INFO = débogage, DEBUG = développement
|
||||
LOG_LEVEL=ERROR
|
||||
8
deployment/security/docker-daemon.json
Normal file
8
deployment/security/docker-daemon.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"log-driver": "json-file",
|
||||
"log-opts": {
|
||||
"max-size": "10m",
|
||||
"max-file": "3"
|
||||
},
|
||||
"storage-driver": "overlay2"
|
||||
}
|
||||
12
deployment/security/docker-iptables.service
Normal file
12
deployment/security/docker-iptables.service
Normal file
@@ -0,0 +1,12 @@
|
||||
[Unit]
|
||||
Description=DictIA Docker iptables rules
|
||||
After=docker.service tailscaled.service
|
||||
Requires=docker.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
RemainAfterExit=yes
|
||||
ExecStart=/bin/bash /opt/dictia/deployment/security/iptables-rules.sh
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
36
deployment/security/iptables-rules.sh
Normal file
36
deployment/security/iptables-rules.sh
Normal file
@@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env bash
|
||||
# DictIA — iptables rules for cloud VPS
|
||||
#
|
||||
# Allows Docker internal traffic to reach the ASR proxy on port 9090.
|
||||
# Blocks direct external access to Docker container IPs.
|
||||
# Tailscale + UFW handle the main firewall — this script adds Docker-specific rules.
|
||||
#
|
||||
# Usage: sudo bash iptables-rules.sh
|
||||
set -euo pipefail
|
||||
|
||||
echo "=== DictIA iptables rules ==="
|
||||
|
||||
# Allow Docker containers (172.16.0.0/12) to reach ASR proxy on port 9090
|
||||
# This rule goes BEFORE the default DROP policy so containers can talk to the proxy
|
||||
iptables -C INPUT -s 172.16.0.0/12 -p tcp --dport 9090 -j ACCEPT 2>/dev/null \
|
||||
|| iptables -I INPUT 1 -s 172.16.0.0/12 -p tcp --dport 9090 -j ACCEPT
|
||||
|
||||
# Block direct external access to Docker container IPs (raw table, before conntrack)
|
||||
# Protects containers on non-default bridge networks (e.g., dictia-network)
|
||||
for NETWORK_ID in $(docker network ls --filter driver=bridge --format '{{.ID}}' 2>/dev/null); do
|
||||
BRIDGE=$(docker network inspect "$NETWORK_ID" --format '{{.Options.com.docker.network.bridge.name}}' 2>/dev/null || echo "")
|
||||
[ -z "$BRIDGE" ] && continue
|
||||
[ "$BRIDGE" = "docker0" ] && continue
|
||||
|
||||
for CONTAINER_IP in $(docker network inspect "$NETWORK_ID" \
|
||||
--format '{{range .Containers}}{{.IPv4Address}} {{end}}' 2>/dev/null); do
|
||||
IP="${CONTAINER_IP%/*}"
|
||||
[ -z "$IP" ] && continue
|
||||
iptables -t raw -C PREROUTING -d "$IP" ! -i "$BRIDGE" -j DROP 2>/dev/null \
|
||||
|| iptables -t raw -A PREROUTING -d "$IP" ! -i "$BRIDGE" -j DROP
|
||||
echo " Protected $IP on $BRIDGE"
|
||||
done
|
||||
done
|
||||
|
||||
echo "Rules applied. Tailscale + Docker internal traffic allowed."
|
||||
echo "Verify with: sudo iptables -L -n -t raw"
|
||||
300
deployment/setup.sh
Executable file
300
deployment/setup.sh
Executable file
@@ -0,0 +1,300 @@
|
||||
#!/usr/bin/env bash
|
||||
# DictIA — Main setup script
|
||||
#
|
||||
# Interactive installer that detects hardware and configures the appropriate
|
||||
# deployment profile (cloud, local-cpu, local-gpu).
|
||||
#
|
||||
# Usage:
|
||||
# bash deployment/setup.sh # Interactive mode
|
||||
# bash deployment/setup.sh --profile cloud # Non-interactive
|
||||
# bash deployment/setup.sh --profile local-gpu
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
PROFILE=""
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--profile=*) PROFILE="${arg#*=}" ;;
|
||||
--profile) shift_next=true ;;
|
||||
*)
|
||||
if [ "${shift_next:-false}" = true ]; then
|
||||
PROFILE="$arg"
|
||||
shift_next=false
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# --- Colors ---
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
info() { echo -e "${CYAN}[INFO]${NC} $*"; }
|
||||
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
|
||||
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
|
||||
err() { echo -e "${RED}[ERROR]${NC} $*"; }
|
||||
|
||||
echo
|
||||
echo -e "${CYAN}========================================${NC}"
|
||||
echo -e "${CYAN} DictIA — Setup${NC}"
|
||||
echo -e "${CYAN}========================================${NC}"
|
||||
echo
|
||||
|
||||
# ==========================================================================
|
||||
# 1. Hardware Detection
|
||||
# ==========================================================================
|
||||
info "Detecting hardware..."
|
||||
|
||||
# Docker
|
||||
if command -v docker &>/dev/null && docker info &>/dev/null; then
|
||||
DOCKER_VERSION=$(docker --version | grep -oP '\d+\.\d+\.\d+' | head -1)
|
||||
ok "Docker $DOCKER_VERSION"
|
||||
else
|
||||
err "Docker not found or not running."
|
||||
echo " Install Docker: https://docs.docker.com/engine/install/"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Docker Compose
|
||||
if docker compose version &>/dev/null; then
|
||||
COMPOSE_VERSION=$(docker compose version --short 2>/dev/null || echo "unknown")
|
||||
ok "Docker Compose $COMPOSE_VERSION"
|
||||
else
|
||||
err "Docker Compose not found."
|
||||
echo " Docker Compose V2 is required (comes with Docker Desktop or docker-compose-plugin)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# GPU
|
||||
HAS_GPU=false
|
||||
if command -v nvidia-smi &>/dev/null; then
|
||||
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1 || echo "")
|
||||
if [ -n "$GPU_NAME" ]; then
|
||||
HAS_GPU=true
|
||||
ok "NVIDIA GPU: $GPU_NAME"
|
||||
# Check nvidia-container-toolkit
|
||||
if docker info 2>/dev/null | grep -qi nvidia; then
|
||||
ok "nvidia-container-toolkit detected"
|
||||
else
|
||||
warn "nvidia-container-toolkit not detected. Required for local-gpu profile."
|
||||
echo " Install: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
info "No NVIDIA GPU detected"
|
||||
fi
|
||||
|
||||
# RAM
|
||||
if command -v free &>/dev/null; then
|
||||
RAM_GB=$(free -g | awk '/Mem:/{print $2}')
|
||||
info "RAM: ${RAM_GB}GB"
|
||||
fi
|
||||
|
||||
# Disk
|
||||
DISK_AVAIL=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $4}')
|
||||
info "Disk available: $DISK_AVAIL"
|
||||
|
||||
echo
|
||||
|
||||
# ==========================================================================
|
||||
# 2. Profile Selection
|
||||
# ==========================================================================
|
||||
if [ -z "$PROFILE" ]; then
|
||||
echo -e "${CYAN}Select deployment profile:${NC}"
|
||||
echo
|
||||
echo " 1) cloud — VPS with ASR Proxy (GCP GPU on demand)"
|
||||
echo " Best for: remote servers, pay-per-use GPU"
|
||||
echo
|
||||
echo " 2) local-gpu — Local NVIDIA GPU for transcription"
|
||||
echo " Best for: dedicated GPU server, fastest"
|
||||
if [ "$HAS_GPU" = false ]; then
|
||||
echo -e " ${YELLOW}(No GPU detected on this machine)${NC}"
|
||||
fi
|
||||
echo
|
||||
echo " 3) local-cpu — CPU-only transcription (slow)"
|
||||
echo " Best for: testing, low-volume usage"
|
||||
echo
|
||||
read -rp "Choice [1-3]: " CHOICE
|
||||
case "$CHOICE" in
|
||||
1) PROFILE="cloud" ;;
|
||||
2) PROFILE="local-gpu" ;;
|
||||
3) PROFILE="local-cpu" ;;
|
||||
*) err "Invalid choice"; exit 1 ;;
|
||||
esac
|
||||
fi
|
||||
|
||||
COMPOSE_FILE="$SCRIPT_DIR/docker/docker-compose.$PROFILE.yml"
|
||||
if [ ! -f "$COMPOSE_FILE" ]; then
|
||||
err "Compose file not found: $COMPOSE_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ok "Profile: $PROFILE"
|
||||
echo
|
||||
|
||||
# ==========================================================================
|
||||
# 3. Generate .env
|
||||
# ==========================================================================
|
||||
ENV_FILE="$PROJECT_DIR/.env"
|
||||
|
||||
if [ -f "$ENV_FILE" ]; then
|
||||
warn ".env already exists. Keeping existing configuration."
|
||||
echo " To reconfigure, delete .env and re-run setup."
|
||||
else
|
||||
info "Generating .env..."
|
||||
|
||||
# Generate secret key
|
||||
SECRET_KEY=$(python3 -c "import secrets; print(secrets.token_hex(32))" 2>/dev/null \
|
||||
|| openssl rand -hex 32 2>/dev/null \
|
||||
|| head -c 64 /dev/urandom | xxd -p | head -c 64)
|
||||
|
||||
# Prompt for admin credentials
|
||||
read -rp "Admin username [admin]: " ADMIN_USER
|
||||
ADMIN_USER="${ADMIN_USER:-admin}"
|
||||
read -rp "Admin email [admin@example.com]: " ADMIN_EMAIL
|
||||
ADMIN_EMAIL="${ADMIN_EMAIL:-admin@example.com}"
|
||||
read -rsp "Admin password: " ADMIN_PASS
|
||||
echo
|
||||
ADMIN_PASS="${ADMIN_PASS:-changeme}"
|
||||
|
||||
# Prompt for text model API key
|
||||
echo
|
||||
info "DictIA needs a text/LLM API key for summaries, titles, and chat."
|
||||
echo " Recommended: OpenRouter (https://openrouter.ai) — access to many models"
|
||||
read -rp "Text model API key (or press Enter to skip): " TEXT_API_KEY
|
||||
TEXT_API_KEY="${TEXT_API_KEY:-your_openrouter_api_key}"
|
||||
|
||||
# HuggingFace token for diarization
|
||||
if [ "$PROFILE" != "cloud" ]; then
|
||||
echo
|
||||
info "For speaker diarization, a HuggingFace token is needed."
|
||||
echo " Get one at: https://huggingface.co/settings/tokens"
|
||||
echo " Accept model: https://huggingface.co/pyannote/speaker-diarization-3.1"
|
||||
read -rp "HuggingFace token (or press Enter to skip): " HF_TOKEN
|
||||
HF_TOKEN="${HF_TOKEN:-}"
|
||||
else
|
||||
HF_TOKEN=""
|
||||
fi
|
||||
|
||||
# Write .env
|
||||
cp "$SCRIPT_DIR/docker/.env.example" "$ENV_FILE"
|
||||
sed -i "s|SECRET_KEY=.*|SECRET_KEY=$SECRET_KEY|" "$ENV_FILE"
|
||||
sed -i "s|DICTIA_PROFILE=.*|DICTIA_PROFILE=$PROFILE|" "$ENV_FILE"
|
||||
sed -i "s|ADMIN_USERNAME=.*|ADMIN_USERNAME=$ADMIN_USER|" "$ENV_FILE"
|
||||
sed -i "s|ADMIN_EMAIL=.*|ADMIN_EMAIL=$ADMIN_EMAIL|" "$ENV_FILE"
|
||||
sed -i "s|ADMIN_PASSWORD=.*|ADMIN_PASSWORD=$ADMIN_PASS|" "$ENV_FILE"
|
||||
sed -i "s|TEXT_MODEL_API_KEY=.*|TEXT_MODEL_API_KEY=$TEXT_API_KEY|" "$ENV_FILE"
|
||||
sed -i "s|HF_TOKEN=.*|HF_TOKEN=$HF_TOKEN|" "$ENV_FILE"
|
||||
|
||||
ok ".env generated"
|
||||
fi
|
||||
echo
|
||||
|
||||
# ==========================================================================
|
||||
# 4. Create data directories
|
||||
# ==========================================================================
|
||||
info "Creating data directories..."
|
||||
mkdir -p "$PROJECT_DIR/data/uploads" "$PROJECT_DIR/data/instance"
|
||||
ok "data/uploads and data/instance created"
|
||||
echo
|
||||
|
||||
# ==========================================================================
|
||||
# 5. Profile-specific setup
|
||||
# ==========================================================================
|
||||
case "$PROFILE" in
|
||||
cloud)
|
||||
info "Cloud profile — setting up ASR Proxy..."
|
||||
if [ -f "$SCRIPT_DIR/asr-proxy/setup.sh" ]; then
|
||||
echo " Run the ASR proxy setup separately:"
|
||||
echo " bash $SCRIPT_DIR/asr-proxy/setup.sh"
|
||||
fi
|
||||
echo
|
||||
info "Setting up iptables rules..."
|
||||
if [ -f "$SCRIPT_DIR/security/iptables-rules.sh" ] && [ "$(id -u)" -eq 0 ]; then
|
||||
bash "$SCRIPT_DIR/security/iptables-rules.sh"
|
||||
else
|
||||
echo " Run as root: sudo bash $SCRIPT_DIR/security/iptables-rules.sh"
|
||||
fi
|
||||
echo
|
||||
info "Setting up Tailscale Serve..."
|
||||
if command -v tailscale &>/dev/null; then
|
||||
echo " Run: bash $SCRIPT_DIR/config/tailscale/setup-serve.sh"
|
||||
else
|
||||
warn "Tailscale not installed."
|
||||
echo " Install: curl -fsSL https://tailscale.com/install.sh | sh"
|
||||
fi
|
||||
;;
|
||||
local-gpu)
|
||||
info "Local GPU profile — verifying NVIDIA runtime..."
|
||||
if docker info 2>/dev/null | grep -qi nvidia; then
|
||||
ok "NVIDIA Docker runtime available"
|
||||
# Quick GPU test
|
||||
if docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi &>/dev/null; then
|
||||
ok "GPU test passed"
|
||||
else
|
||||
warn "GPU test failed. Check nvidia-container-toolkit installation."
|
||||
fi
|
||||
else
|
||||
err "NVIDIA Docker runtime not found."
|
||||
echo " Install nvidia-container-toolkit and restart Docker."
|
||||
echo " https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
|
||||
fi
|
||||
;;
|
||||
local-cpu)
|
||||
warn "CPU-only transcription is significantly slower than GPU."
|
||||
echo " Expect ~10x real-time (1h audio = ~10h processing)."
|
||||
echo " Consider local-gpu or cloud profile for better performance."
|
||||
;;
|
||||
esac
|
||||
|
||||
echo
|
||||
|
||||
# ==========================================================================
|
||||
# 6. Build and start
|
||||
# ==========================================================================
|
||||
info "Building DictIA Docker image..."
|
||||
cd "$PROJECT_DIR"
|
||||
docker build -t innova-ai/dictia:latest .
|
||||
ok "Image built"
|
||||
|
||||
echo
|
||||
info "Starting DictIA ($PROFILE profile)..."
|
||||
docker compose -f "$COMPOSE_FILE" up -d
|
||||
ok "Containers started"
|
||||
|
||||
# ==========================================================================
|
||||
# 7. Health check
|
||||
# ==========================================================================
|
||||
echo
|
||||
info "Waiting for DictIA to become healthy..."
|
||||
RETRIES=30
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
|
||||
ok "DictIA is healthy!"
|
||||
break
|
||||
fi
|
||||
if [ "$i" -eq "$RETRIES" ]; then
|
||||
warn "Health check timeout. Check logs: docker compose -f $COMPOSE_FILE logs"
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
|
||||
echo
|
||||
echo -e "${GREEN}========================================${NC}"
|
||||
echo -e "${GREEN} DictIA is ready!${NC}"
|
||||
echo -e "${GREEN}========================================${NC}"
|
||||
echo
|
||||
echo " App: http://localhost:8899"
|
||||
echo " Profile: $PROFILE"
|
||||
echo " Compose: $COMPOSE_FILE"
|
||||
echo
|
||||
echo " Tools:"
|
||||
echo " Update: bash deployment/tools/update.sh"
|
||||
echo " Backup: bash deployment/tools/backup.sh"
|
||||
echo " Health check: bash deployment/tools/health-check.sh"
|
||||
echo
|
||||
89
deployment/tools/backup.sh
Normal file
89
deployment/tools/backup.sh
Normal file
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env bash
|
||||
# DictIA — Backup script
|
||||
#
|
||||
# Creates a timestamped backup of data, env, and Docker volumes.
|
||||
# Keeps the last N backups (default: 5).
|
||||
#
|
||||
# Usage: bash backup.sh [BACKUP_DIR]
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
BACKUP_BASE="${1:-$PROJECT_DIR/backups}"
|
||||
KEEP_COUNT=5
|
||||
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
||||
BACKUP_DIR="$BACKUP_BASE/dictia-$TIMESTAMP"
|
||||
|
||||
echo "=== DictIA Backup ==="
|
||||
echo "Project: $PROJECT_DIR"
|
||||
echo "Backup: $BACKUP_DIR"
|
||||
echo
|
||||
|
||||
mkdir -p "$BACKUP_DIR"
|
||||
|
||||
# 1. Data directory
|
||||
if [ -d "$PROJECT_DIR/data" ]; then
|
||||
echo "[1/4] Backing up data/..."
|
||||
cp -a "$PROJECT_DIR/data" "$BACKUP_DIR/data"
|
||||
else
|
||||
echo "[1/4] No data/ directory found, skipping."
|
||||
fi
|
||||
|
||||
# 2. Environment file
|
||||
if [ -f "$PROJECT_DIR/.env" ]; then
|
||||
echo "[2/4] Backing up .env..."
|
||||
cp "$PROJECT_DIR/.env" "$BACKUP_DIR/dot-env"
|
||||
else
|
||||
echo "[2/4] No .env found, skipping."
|
||||
fi
|
||||
|
||||
# 3. ASR Proxy stats
|
||||
ASR_STATS="$PROJECT_DIR/deployment/asr-proxy/usage-stats.json"
|
||||
if [ -f "$ASR_STATS" ]; then
|
||||
echo "[3/4] Backing up ASR proxy stats..."
|
||||
cp "$ASR_STATS" "$BACKUP_DIR/asr-usage-stats.json"
|
||||
else
|
||||
echo "[3/4] No ASR proxy stats, skipping."
|
||||
fi
|
||||
|
||||
# 4. Docker volumes (if using managed volumes)
|
||||
echo "[4/4] Checking Docker volumes..."
|
||||
if docker volume ls --format '{{.Name}}' 2>/dev/null | grep -q "whisperx-cache"; then
|
||||
echo " Exporting whisperx-cache volume..."
|
||||
docker run --rm -v whisperx-cache:/source -v "$BACKUP_DIR":/backup \
|
||||
alpine tar czf /backup/whisperx-cache.tar.gz -C /source . 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Write manifest
|
||||
cat > "$BACKUP_DIR/manifest.json" <<MANIFEST
|
||||
{
|
||||
"timestamp": "$TIMESTAMP",
|
||||
"project_dir": "$PROJECT_DIR",
|
||||
"hostname": "$(hostname)",
|
||||
"contents": {
|
||||
"data": $([ -d "$BACKUP_DIR/data" ] && echo "true" || echo "false"),
|
||||
"env": $([ -f "$BACKUP_DIR/dot-env" ] && echo "true" || echo "false"),
|
||||
"asr_stats": $([ -f "$BACKUP_DIR/asr-usage-stats.json" ] && echo "true" || echo "false"),
|
||||
"whisperx_cache": $([ -f "$BACKUP_DIR/whisperx-cache.tar.gz" ] && echo "true" || echo "false")
|
||||
}
|
||||
}
|
||||
MANIFEST
|
||||
|
||||
# Compress
|
||||
echo
|
||||
echo "Compressing backup..."
|
||||
ARCHIVE="$BACKUP_BASE/dictia-$TIMESTAMP.tar.gz"
|
||||
tar czf "$ARCHIVE" -C "$BACKUP_BASE" "dictia-$TIMESTAMP"
|
||||
rm -rf "$BACKUP_DIR"
|
||||
echo "Archive: $ARCHIVE ($(du -h "$ARCHIVE" | cut -f1))"
|
||||
|
||||
# Rotate old backups
|
||||
BACKUP_COUNT=$(ls -1 "$BACKUP_BASE"/dictia-*.tar.gz 2>/dev/null | wc -l)
|
||||
if [ "$BACKUP_COUNT" -gt "$KEEP_COUNT" ]; then
|
||||
echo
|
||||
echo "Rotating backups (keeping last $KEEP_COUNT)..."
|
||||
ls -1t "$BACKUP_BASE"/dictia-*.tar.gz | tail -n +"$((KEEP_COUNT + 1))" | xargs rm -f
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "=== Backup complete ==="
|
||||
157
deployment/tools/health-check.sh
Normal file
157
deployment/tools/health-check.sh
Normal file
@@ -0,0 +1,157 @@
|
||||
#!/usr/bin/env bash
|
||||
# DictIA — Health check diagnostic
|
||||
#
|
||||
# Checks Docker, containers, endpoints, disk, RAM, and GPU.
|
||||
#
|
||||
# Usage:
|
||||
# bash health-check.sh # Human-readable output
|
||||
# bash health-check.sh --json # JSON output
|
||||
# bash health-check.sh --quiet # Exit code only (0=ok, 1=issue)
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
OUTPUT="human"
|
||||
ISSUES=0
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--json) OUTPUT="json" ;;
|
||||
--quiet) OUTPUT="quiet" ;;
|
||||
esac
|
||||
done
|
||||
|
||||
declare -A CHECKS
|
||||
|
||||
check() {
|
||||
local name="$1"
|
||||
local status="$2"
|
||||
local detail="${3:-}"
|
||||
CHECKS["$name"]="$status|$detail"
|
||||
if [ "$status" = "error" ] || [ "$status" = "warning" ]; then
|
||||
ISSUES=$((ISSUES + 1))
|
||||
fi
|
||||
}
|
||||
|
||||
# --- Docker ---
|
||||
if command -v docker &>/dev/null && docker info &>/dev/null; then
|
||||
check "docker" "ok" "Docker daemon running"
|
||||
else
|
||||
check "docker" "error" "Docker not available"
|
||||
fi
|
||||
|
||||
# --- Containers ---
|
||||
DICTIA_STATUS=$(docker inspect --format='{{.State.Health.Status}}' dictia 2>/dev/null || echo "not_found")
|
||||
if [ "$DICTIA_STATUS" = "healthy" ]; then
|
||||
check "container_dictia" "ok" "healthy"
|
||||
elif [ "$DICTIA_STATUS" = "not_found" ]; then
|
||||
check "container_dictia" "error" "container not found"
|
||||
else
|
||||
check "container_dictia" "warning" "$DICTIA_STATUS"
|
||||
fi
|
||||
|
||||
WHISPERX_STATUS=$(docker inspect --format='{{.State.Status}}' whisperx-asr 2>/dev/null || echo "not_found")
|
||||
if [ "$WHISPERX_STATUS" = "running" ]; then
|
||||
check "container_whisperx" "ok" "running"
|
||||
elif [ "$WHISPERX_STATUS" = "not_found" ]; then
|
||||
check "container_whisperx" "info" "not present (cloud profile?)"
|
||||
else
|
||||
check "container_whisperx" "warning" "$WHISPERX_STATUS"
|
||||
fi
|
||||
|
||||
# --- Endpoints ---
|
||||
if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
|
||||
check "endpoint_dictia" "ok" "http://localhost:8899 responding"
|
||||
else
|
||||
check "endpoint_dictia" "error" "http://localhost:8899 not responding"
|
||||
fi
|
||||
|
||||
if curl -sf -o /dev/null -m 5 http://localhost:9000/health 2>/dev/null; then
|
||||
check "endpoint_whisperx" "ok" "http://localhost:9000 responding"
|
||||
else
|
||||
check "endpoint_whisperx" "info" "http://localhost:9000 not responding"
|
||||
fi
|
||||
|
||||
if curl -sf -o /dev/null -m 5 http://localhost:9090/health 2>/dev/null; then
|
||||
check "endpoint_asr_proxy" "ok" "http://localhost:9090 responding"
|
||||
else
|
||||
check "endpoint_asr_proxy" "info" "http://localhost:9090 not responding"
|
||||
fi
|
||||
|
||||
# --- Disk ---
|
||||
DISK_USED=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $5}' | tr -d '%')
|
||||
if [ -n "$DISK_USED" ]; then
|
||||
if [ "$DISK_USED" -gt 90 ]; then
|
||||
check "disk" "error" "${DISK_USED}% used"
|
||||
elif [ "$DISK_USED" -gt 80 ]; then
|
||||
check "disk" "warning" "${DISK_USED}% used"
|
||||
else
|
||||
check "disk" "ok" "${DISK_USED}% used"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- RAM ---
|
||||
if command -v free &>/dev/null; then
|
||||
MEM_TOTAL=$(free -m | awk '/Mem:/{print $2}')
|
||||
MEM_AVAIL=$(free -m | awk '/Mem:/{print $7}')
|
||||
MEM_USED_PCT=$(( (MEM_TOTAL - MEM_AVAIL) * 100 / MEM_TOTAL ))
|
||||
if [ "$MEM_USED_PCT" -gt 90 ]; then
|
||||
check "memory" "warning" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
|
||||
else
|
||||
check "memory" "ok" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- GPU ---
|
||||
if command -v nvidia-smi &>/dev/null; then
|
||||
GPU_INFO=$(nvidia-smi --query-gpu=name,memory.used,memory.total --format=csv,noheader 2>/dev/null || echo "error")
|
||||
if [ "$GPU_INFO" != "error" ]; then
|
||||
check "gpu" "ok" "$GPU_INFO"
|
||||
else
|
||||
check "gpu" "warning" "nvidia-smi present but query failed"
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Output ---
|
||||
if [ "$OUTPUT" = "json" ]; then
|
||||
echo "{"
|
||||
echo " \"timestamp\": \"$(date -Is)\","
|
||||
echo " \"issues\": $ISSUES,"
|
||||
echo " \"checks\": {"
|
||||
FIRST=true
|
||||
for name in "${!CHECKS[@]}"; do
|
||||
IFS='|' read -r status detail <<< "${CHECKS[$name]}"
|
||||
if [ "$FIRST" = true ]; then
|
||||
FIRST=false
|
||||
else
|
||||
echo ","
|
||||
fi
|
||||
printf ' "%s": {"status": "%s", "detail": "%s"}' "$name" "$status" "$detail"
|
||||
done
|
||||
echo
|
||||
echo " }"
|
||||
echo "}"
|
||||
elif [ "$OUTPUT" = "quiet" ]; then
|
||||
exit $( [ "$ISSUES" -eq 0 ] && echo 0 || echo 1 )
|
||||
else
|
||||
echo "=== DictIA Health Check ==="
|
||||
echo
|
||||
for name in docker container_dictia container_whisperx endpoint_dictia endpoint_whisperx endpoint_asr_proxy disk memory gpu; do
|
||||
if [ -n "${CHECKS[$name]+x}" ]; then
|
||||
IFS='|' read -r status detail <<< "${CHECKS[$name]}"
|
||||
case "$status" in
|
||||
ok) ICON="[OK]" ;;
|
||||
warning) ICON="[!!]" ;;
|
||||
error) ICON="[ERR]" ;;
|
||||
info) ICON="[--]" ;;
|
||||
esac
|
||||
printf " %-22s %s %s\n" "$name" "$ICON" "$detail"
|
||||
fi
|
||||
done
|
||||
echo
|
||||
if [ "$ISSUES" -eq 0 ]; then
|
||||
echo "All checks passed."
|
||||
else
|
||||
echo "$ISSUES issue(s) found."
|
||||
fi
|
||||
fi
|
||||
101
deployment/tools/restore.sh
Normal file
101
deployment/tools/restore.sh
Normal file
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env bash
|
||||
# DictIA — Restore script
|
||||
#
|
||||
# Restores a DictIA backup archive created by backup.sh.
|
||||
#
|
||||
# Usage: bash restore.sh <ARCHIVE_PATH> [PROJECT_DIR]
|
||||
set -euo pipefail
|
||||
|
||||
ARCHIVE="${1:-}"
|
||||
PROJECT_DIR="${2:-$(cd "$(dirname "$0")/../.." && pwd)}"
|
||||
|
||||
if [ -z "$ARCHIVE" ] || [ ! -f "$ARCHIVE" ]; then
|
||||
echo "Usage: bash restore.sh <backup-archive.tar.gz> [project-dir]"
|
||||
echo
|
||||
echo "Available backups:"
|
||||
ls -1t "$PROJECT_DIR/backups"/dictia-*.tar.gz 2>/dev/null | head -5 || echo " (none found)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== DictIA Restore ==="
|
||||
echo "Archive: $ARCHIVE"
|
||||
echo "Target: $PROJECT_DIR"
|
||||
echo
|
||||
|
||||
# Validate archive
|
||||
echo "Validating archive..."
|
||||
TMPDIR=$(mktemp -d)
|
||||
tar xzf "$ARCHIVE" -C "$TMPDIR"
|
||||
BACKUP_DIR=$(ls -1d "$TMPDIR"/dictia-* | head -1)
|
||||
|
||||
if [ ! -f "$BACKUP_DIR/manifest.json" ]; then
|
||||
echo "ERROR: Invalid backup archive (no manifest.json)"
|
||||
rm -rf "$TMPDIR"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Manifest:"
|
||||
cat "$BACKUP_DIR/manifest.json"
|
||||
echo
|
||||
echo
|
||||
|
||||
# Confirmation
|
||||
read -rp "This will overwrite current data. Continue? [y/N] " CONFIRM
|
||||
if [ "$CONFIRM" != "y" ] && [ "$CONFIRM" != "Y" ]; then
|
||||
echo "Aborted."
|
||||
rm -rf "$TMPDIR"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Stop services
|
||||
echo
|
||||
echo "Stopping DictIA services..."
|
||||
COMPOSE_FILE=""
|
||||
for f in cloud local-cpu local-gpu; do
|
||||
if [ -f "$PROJECT_DIR/deployment/docker/docker-compose.$f.yml" ]; then
|
||||
COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml"
|
||||
fi
|
||||
done
|
||||
if [ -n "$COMPOSE_FILE" ]; then
|
||||
docker compose -f "$COMPOSE_FILE" down 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Restore data
|
||||
if [ -d "$BACKUP_DIR/data" ]; then
|
||||
echo "Restoring data/..."
|
||||
rm -rf "$PROJECT_DIR/data"
|
||||
cp -a "$BACKUP_DIR/data" "$PROJECT_DIR/data"
|
||||
fi
|
||||
|
||||
# Restore .env
|
||||
if [ -f "$BACKUP_DIR/dot-env" ]; then
|
||||
echo "Restoring .env..."
|
||||
cp "$BACKUP_DIR/dot-env" "$PROJECT_DIR/.env"
|
||||
fi
|
||||
|
||||
# Restore ASR stats
|
||||
if [ -f "$BACKUP_DIR/asr-usage-stats.json" ]; then
|
||||
echo "Restoring ASR proxy stats..."
|
||||
cp "$BACKUP_DIR/asr-usage-stats.json" "$PROJECT_DIR/deployment/asr-proxy/usage-stats.json"
|
||||
fi
|
||||
|
||||
# Restore Docker volumes
|
||||
if [ -f "$BACKUP_DIR/whisperx-cache.tar.gz" ]; then
|
||||
echo "Restoring whisperx-cache volume..."
|
||||
docker volume create whisperx-cache 2>/dev/null || true
|
||||
docker run --rm -v whisperx-cache:/target -v "$BACKUP_DIR":/backup \
|
||||
alpine sh -c "cd /target && tar xzf /backup/whisperx-cache.tar.gz" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
rm -rf "$TMPDIR"
|
||||
|
||||
# Restart services
|
||||
echo
|
||||
echo "Restarting DictIA..."
|
||||
if [ -n "$COMPOSE_FILE" ]; then
|
||||
docker compose -f "$COMPOSE_FILE" up -d
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "=== Restore complete ==="
|
||||
105
deployment/tools/update.sh
Normal file
105
deployment/tools/update.sh
Normal file
@@ -0,0 +1,105 @@
|
||||
#!/usr/bin/env bash
|
||||
# DictIA — Update script
|
||||
#
|
||||
# Pulls latest code, rebuilds Docker image, and restarts services.
|
||||
# Detects the active deployment profile automatically.
|
||||
#
|
||||
# Usage: bash update.sh [--no-pull] [--no-build]
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
NO_PULL=false
|
||||
NO_BUILD=false
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--no-pull) NO_PULL=true ;;
|
||||
--no-build) NO_BUILD=true ;;
|
||||
*) echo "Unknown option: $arg"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "=== DictIA Update ==="
|
||||
echo "Project: $PROJECT_DIR"
|
||||
echo
|
||||
|
||||
# 1. Detect active compose file
|
||||
COMPOSE_FILE=""
|
||||
PROFILE=""
|
||||
for f in cloud local-cpu local-gpu; do
|
||||
CF="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml"
|
||||
if [ -f "$CF" ] && docker compose -f "$CF" ps --quiet 2>/dev/null | grep -q .; then
|
||||
COMPOSE_FILE="$CF"
|
||||
PROFILE="$f"
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
if [ -z "$COMPOSE_FILE" ]; then
|
||||
# Fallback: check .env for profile
|
||||
if [ -f "$PROJECT_DIR/.env" ]; then
|
||||
PROFILE=$(grep -E '^DICTIA_PROFILE=' "$PROJECT_DIR/.env" 2>/dev/null | cut -d= -f2 || echo "cloud")
|
||||
fi
|
||||
PROFILE="${PROFILE:-cloud}"
|
||||
COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$PROFILE.yml"
|
||||
fi
|
||||
|
||||
echo "Profile: $PROFILE"
|
||||
echo "Compose: $COMPOSE_FILE"
|
||||
echo
|
||||
|
||||
# 2. Git pull
|
||||
if [ "$NO_PULL" = false ]; then
|
||||
echo "[1/5] Pulling latest code..."
|
||||
cd "$PROJECT_DIR"
|
||||
git pull origin dictia-branding
|
||||
else
|
||||
echo "[1/5] Skipping git pull (--no-pull)"
|
||||
fi
|
||||
|
||||
# 3. Rebuild DictIA image
|
||||
if [ "$NO_BUILD" = false ]; then
|
||||
echo "[2/5] Building DictIA image..."
|
||||
cd "$PROJECT_DIR"
|
||||
docker build -t innova-ai/dictia:latest .
|
||||
else
|
||||
echo "[2/5] Skipping build (--no-build)"
|
||||
fi
|
||||
|
||||
# 3b. Pull upstream images (WhisperX) if local profile
|
||||
if [ "$PROFILE" != "cloud" ] && [ "$NO_BUILD" = false ]; then
|
||||
echo "[3/5] Pulling upstream images (WhisperX)..."
|
||||
docker compose -f "$COMPOSE_FILE" pull whisperx-asr 2>/dev/null || true
|
||||
else
|
||||
echo "[3/5] Skipping upstream pull (cloud profile or --no-build)"
|
||||
fi
|
||||
|
||||
# 4. Restart containers
|
||||
echo "[4/5] Restarting containers..."
|
||||
docker compose -f "$COMPOSE_FILE" down
|
||||
docker compose -f "$COMPOSE_FILE" up -d
|
||||
|
||||
# 5. Wait for health
|
||||
echo "[5/5] Waiting for health check..."
|
||||
RETRIES=30
|
||||
for i in $(seq 1 $RETRIES); do
|
||||
if docker compose -f "$COMPOSE_FILE" ps | grep -q "healthy"; then
|
||||
echo " DictIA is healthy!"
|
||||
break
|
||||
fi
|
||||
if [ "$i" -eq "$RETRIES" ]; then
|
||||
echo " WARNING: Health check timeout. Check: docker compose -f $COMPOSE_FILE logs"
|
||||
fi
|
||||
sleep 5
|
||||
done
|
||||
|
||||
# Cleanup dangling images
|
||||
echo
|
||||
echo "Cleaning up old images..."
|
||||
docker image prune -f 2>/dev/null || true
|
||||
|
||||
echo
|
||||
echo "=== Update complete ==="
|
||||
echo "DictIA: http://localhost:8899"
|
||||
docker compose -f "$COMPOSE_FILE" ps
|
||||
Reference in New Issue
Block a user