Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)

This commit is contained in:
InnovA AI
2026-03-16 21:47:37 +00:00
commit 42772a31ed
365 changed files with 103572 additions and 0 deletions

105
deployment/README.md Normal file
View File

@@ -0,0 +1,105 @@
# DictIA — Deployment Infrastructure
Infrastructure de deploiement reproductible pour DictIA .
## Choix de profil
```
Quel est ton setup?
|
+-- VPS / serveur cloud?
| --> cloud (ASR Proxy GCP GPU on demand)
|
+-- Machine locale avec GPU NVIDIA?
| --> local-gpu (WhisperX sur GPU, le plus rapide)
|
+-- Machine locale sans GPU?
--> local-cpu (WhisperX sur CPU, lent mais fonctionnel)
```
## Quickstart
```bash
git clone https://gitea.innova-ai.ca/Innova-AI/dictia-public.git
cd dictia
git checkout dictia-branding
bash deployment/setup.sh
```
Le script detecte le hardware et guide l'installation.
## Architecture
```
deployment/
├── setup.sh # Installateur principal
├── docker/
│ ├── docker-compose.cloud.yml
│ ├── docker-compose.local-cpu.yml
│ ├── docker-compose.local-gpu.yml
│ └── .env.example
├── asr-proxy/ # Proxy GCP GPU (cloud seulement)
│ ├── proxy.py
│ ├── dashboard.html
│ ├── requirements.txt
│ ├── setup.sh
│ └── asr-proxy.service
├── security/ # Securite Docker (cloud)
│ ├── docker-daemon.json
│ ├── iptables-rules.sh
│ └── docker-iptables.service
├── config/
│ ├── nginx/dictia.conf
│ ├── tailscale/setup-serve.sh
│ └── systemd/dictia.service
├── tools/
│ ├── backup.sh
│ ├── restore.sh
│ ├── update.sh
│ └── health-check.sh
└── docs/
├── QUICKSTART.md
├── VPS-SETUP.md
├── LOCAL-SETUP.md
├── MAINTENANCE.md
└── TROUBLESHOOTING.md
```
### Profil Cloud
```
Internet --> Tailscale --> VPS
|
DictIA :8899
|
ASR Proxy :9090
|
GCP GPU (auto start/stop)
|
WhisperX :9000
```
### Profil Local GPU/CPU
```
localhost:8899 --> DictIA container
|
WhisperX container :9000
|
GPU local (ou CPU)
```
## Documentation
- [QUICKSTART.md](docs/QUICKSTART.md) — Demarrage rapide par profil
- [VPS-SETUP.md](docs/VPS-SETUP.md) — Setup VPS complet from scratch
- [LOCAL-SETUP.md](docs/LOCAL-SETUP.md) — Setup local GPU/CPU
- [MAINTENANCE.md](docs/MAINTENANCE.md) — Backup, restore, update, monitoring
- [TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) — Problemes courants + solutions
## Mise a jour upstream
Tous les fichiers dans `deployment/` sont specifiques a DictIA.
Aucun conflit lors des merges upstream, sauf `deployment/setup.sh`
(qui remplace le setup.sh original de Speakr).

5
deployment/asr-proxy/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
gcp-credentials.json
usage-stats.json
venv/
__pycache__/
*.pyc

View File

@@ -0,0 +1,22 @@
# TEMPLATE — Ne pas copier directement dans /etc/systemd/system/.
# Les variables ${ASR_PROXY_USER} et ${ASR_PROXY_DIR} sont des placeholders.
# Le fichier service réel est généré par setup.sh (via heredoc bash) avec les
# valeurs résolues de $SERVICE_USER et $INSTALL_DIR.
# Usage : sudo bash setup.sh (installe et active le service automatiquement)
[Unit]
Description=DictIA ASR Proxy - GPU Auto-Start/Stop for WhisperX
After=network.target
[Service]
Type=simple
User=${ASR_PROXY_USER}
Restart=always
RestartSec=10
WorkingDirectory=${ASR_PROXY_DIR}
ExecStart=${ASR_PROXY_DIR}/venv/bin/python proxy.py
Environment=GOOGLE_APPLICATION_CREDENTIALS=${ASR_PROXY_DIR}/gcp-credentials.json
Environment=STATS_FILE=${ASR_PROXY_DIR}/usage-stats.json
[Install]
WantedBy=multi-user.target

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,741 @@
"""DictIA ASR Proxy - Auto-start/stop GCP GPU for WhisperX + Ollama.
Uses Google Cloud Compute REST API directly (no gcloud CLI needed).
Proxies both ASR (WhisperX) and LLM (Ollama) requests.
Multi-zone fallback across Canada (Montreal + Toronto).
"""
import asyncio
import json
import logging
import os
import time
import httpx
import jwt as pyjwt
from pathlib import Path
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse, JSONResponse, Response
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
log = logging.getLogger("asr-proxy")
# Config — paths relative to this script's directory by default
SCRIPT_DIR = Path(__file__).parent
GCP_PROJECT = os.getenv("GCP_PROJECT", "speakr-gpu")
WHISPERX_PORT = int(os.getenv("WHISPERX_PORT", "9000"))
OLLAMA_PORT = int(os.getenv("OLLAMA_PORT", "11434"))
IDLE_TIMEOUT = int(os.getenv("IDLE_TIMEOUT", "300"))
CREDS_FILE = os.getenv("GOOGLE_APPLICATION_CREDENTIALS", str(SCRIPT_DIR / "gcp-credentials.json"))
STATS_FILE = os.getenv("STATS_FILE", str(SCRIPT_DIR / "usage-stats.json"))
MONTHLY_LIMIT_HOURS = float(os.getenv("MONTHLY_LIMIT_HOURS", "30"))
# Real GCP cost per GPU-hour (g2-standard-4 + L4): GPU ($0.837) + vCPU ($0.151) + RAM ($0.069)
GPU_COST_PER_HOUR = float(os.getenv("GPU_COST_PER_HOUR", "1.06"))
# Fixed monthly costs: SSD disks ($5.66) + snapshots ($4.19) ≈ $9.85/month
FIXED_MONTHLY_COST = float(os.getenv("FIXED_MONTHLY_COST", "9.85"))
SNAPSHOT_NAME = "whisperx-gpu-snapshot"
HEALTH_POLL_INTERVAL = 5
BOOT_TIMEOUT = 300
# Zone fallback order — Canada only, Montreal first
ZONE_FALLBACKS = [
{
"zone": "northamerica-northeast1-b",
"instance": "whisperx-gpu-mtl1",
"machine_type": "g2-standard-4",
"accelerator": "nvidia-l4",
"accel_count": 1,
"label": "Montreal-b (L4)",
},
{
"zone": "northamerica-northeast1-c",
"instance": "whisperx-gpu-mtl2",
"machine_type": "n1-standard-4",
"accelerator": "nvidia-tesla-t4",
"accel_count": 1,
"label": "Montreal-c (T4)",
},
{
"zone": "northamerica-northeast2-a",
"instance": "whisperx-gpu-tor1",
"machine_type": "g2-standard-4",
"accelerator": "nvidia-l4",
"accel_count": 1,
"label": "Toronto-a (L4)",
},
{
"zone": "northamerica-northeast2-b",
"instance": "whisperx-gpu",
"machine_type": "g2-standard-4",
"accelerator": "nvidia-l4",
"accel_count": 1,
"label": "Toronto-b (L4)",
},
]
STARTUP_SCRIPT = """#!/bin/bash
systemctl start docker
sleep 5
docker start whisperx-asr 2>/dev/null || true
systemctl start ollama 2>/dev/null || true
"""
app = FastAPI(title="DictIA ASR Proxy")
# State
last_request_time = 0.0
active_requests = 0
gpu_ip: str | None = None
active_zone: dict | None = None
shutdown_task: asyncio.Task | None = None
# Request history tracking (in-memory, last 20 requests)
request_history: list[dict] = []
MAX_HISTORY = 20
# Zone status tracking
zone_status: dict[str, dict] = {}
# Startup lock and failure cooldown
_startup_lock: asyncio.Lock | None = None
_last_failure_time: float = 0
FAILURE_COOLDOWN = 180
# OAuth2 token cache
_access_token: str | None = None
_token_expiry: float = 0
# --- Usage Stats ---
def load_stats() -> dict:
try:
with open(STATS_FILE) as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
return {"gpu_seconds": 0, "month": time.strftime("%Y-%m"), "requests": 0, "last_start": 0}
def save_stats(stats: dict):
with open(STATS_FILE, "w") as f:
json.dump(stats, f, indent=2)
def track_gpu_time():
stats = load_stats()
current_month = time.strftime("%Y-%m")
if stats.get("month") != current_month:
stats = {"gpu_seconds": 0, "month": current_month, "requests": 0, "last_start": 0}
if stats.get("last_start", 0) > 0:
elapsed = time.time() - stats["last_start"]
stats["gpu_seconds"] += elapsed
stats["last_start"] = 0
save_stats(stats)
def check_budget() -> tuple[bool, float]:
stats = load_stats()
current_month = time.strftime("%Y-%m")
if stats.get("month") != current_month:
return True, 0.0
hours_used = stats.get("gpu_seconds", 0) / 3600
return hours_used < MONTHLY_LIMIT_HOURS, hours_used
# --- GCP Auth ---
async def get_access_token() -> str:
global _access_token, _token_expiry
if _access_token and time.time() < _token_expiry - 60:
return _access_token
with open(CREDS_FILE) as f:
creds = json.load(f)
cred_type = creds.get("type", "authorized_user")
async with httpx.AsyncClient() as client:
if cred_type == "service_account":
now = int(time.time())
payload = {
"iss": creds["client_email"],
"scope": "https://www.googleapis.com/auth/compute",
"aud": "https://oauth2.googleapis.com/token",
"iat": now,
"exp": now + 3600,
}
signed = pyjwt.encode(payload, creds["private_key"], algorithm="RS256")
resp = await client.post(
"https://oauth2.googleapis.com/token",
data={
"grant_type": "urn:ietf:params:oauth:grant-type:jwt-bearer",
"assertion": signed,
},
)
else:
resp = await client.post(
"https://oauth2.googleapis.com/token",
data={
"client_id": creds["client_id"],
"client_secret": creds["client_secret"],
"refresh_token": creds["refresh_token"],
"grant_type": "refresh_token",
},
)
resp.raise_for_status()
data = resp.json()
_access_token = data["access_token"]
_token_expiry = time.time() + data.get("expires_in", 3600)
log.info(f"Refreshed GCP access token ({cred_type})")
return _access_token
# --- GCP Compute API ---
COMPUTE_BASE = "https://compute.googleapis.com/compute/v1"
async def gcp_api(method: str, url: str, **kwargs) -> httpx.Response:
token = await get_access_token()
async with httpx.AsyncClient(timeout=60) as client:
resp = await client.request(
method, url,
headers={"Authorization": f"Bearer {token}"},
**kwargs,
)
return resp
async def get_instance_info(zone: str, instance: str) -> dict | None:
url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}"
resp = await gcp_api("GET", url)
if resp.status_code == 404:
return None
if resp.status_code >= 400:
log.error(f"GCP API error {resp.status_code}: {resp.text}")
return None
return resp.json()
def extract_ip(instance_data: dict) -> str:
interfaces = instance_data.get("networkInterfaces", [])
if interfaces:
access = interfaces[0].get("accessConfigs", [])
if access:
return access[0].get("natIP", "")
return ""
async def start_instance_in_zone(zone: str, instance: str) -> bool:
url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}/start"
resp = await gcp_api("POST", url)
if resp.status_code < 400:
log.info(f"Start requested: {instance} in {zone}")
return True
log.warning(f"Failed to start {instance} in {zone}: {resp.status_code} {resp.text}")
return False
async def stop_instance_in_zone(zone: str, instance: str):
url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}/stop"
resp = await gcp_api("POST", url)
if resp.status_code < 400:
log.info(f"Stop requested: {instance} in {zone}")
else:
log.error(f"Failed to stop {instance} in {zone}: {resp.status_code}")
async def create_instance_from_snapshot(config: dict) -> bool:
zone = config["zone"]
instance = config["instance"]
machine = config["machine_type"]
accel = config["accelerator"]
accel_count = config["accel_count"]
log.info(f"Creating {instance} in {zone} from snapshot...")
body = {
"name": instance,
"machineType": f"zones/{zone}/machineTypes/{machine}",
"disks": [{
"boot": True,
"autoDelete": True,
"initializeParams": {
"diskSizeGb": "50",
"diskType": f"zones/{zone}/diskTypes/pd-ssd",
"sourceSnapshot": f"global/snapshots/{SNAPSHOT_NAME}",
},
}],
"networkInterfaces": [{
"network": "global/networks/default",
"accessConfigs": [{"type": "ONE_TO_ONE_NAT", "name": "External NAT"}],
}],
"guestAccelerators": [{
"acceleratorType": f"zones/{zone}/acceleratorTypes/{accel}",
"acceleratorCount": accel_count,
}],
"scheduling": {
"onHostMaintenance": "TERMINATE",
"automaticRestart": False,
},
"tags": {"items": ["whisperx-gpu"]},
"metadata": {
"items": [{"key": "startup-script", "value": STARTUP_SCRIPT}],
},
}
url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances"
resp = await gcp_api("POST", url, json=body)
if resp.status_code < 400:
log.info(f"Created {instance} in {zone}")
return True
error_text = resp.text
if "ZONE_RESOURCE_POOL_EXHAUSTED" in error_text:
log.warning(f"No capacity in {zone} -- skipping")
elif "QUOTA" in error_text.upper():
log.warning(f"Quota exceeded for {zone}: {error_text[:200]}")
else:
log.error(f"Failed to create in {zone}: {resp.status_code} {error_text[:200]}")
return False
# --- Core Logic ---
async def wait_for_running(zone: str, instance: str, timeout: int = 120, grace: int = 15) -> bool:
gone_count = 0
start_time = time.time()
for _ in range(timeout // 5):
info = await get_instance_info(zone, instance)
if info and info.get("status") == "RUNNING":
return True
status = info.get("status", "UNKNOWN") if info else "GONE"
elapsed = time.time() - start_time
if status == "GONE":
gone_count += 1
if gone_count >= 2:
log.warning(f"{instance} in {zone}: instance disappeared (no capacity)")
return False
if status in ("STOPPING",):
log.warning(f"{instance} in {zone}: status {status} (no capacity)")
return False
if status in ("TERMINATED", "STOPPED") and elapsed > grace:
log.warning(f"{instance} in {zone}: status {status} after {elapsed:.0f}s (no capacity)")
return False
await asyncio.sleep(5)
return False
async def delete_instance(zone: str, instance: str):
url = f"{COMPUTE_BASE}/projects/{GCP_PROJECT}/zones/{zone}/instances/{instance}"
resp = await gcp_api("DELETE", url)
if resp.status_code < 400:
log.info(f"Deleted {instance} in {zone} to free quota")
elif resp.status_code == 404:
pass
else:
log.warning(f"Failed to delete {instance} in {zone}: {resp.status_code}")
async def ensure_gpu_running() -> str:
global gpu_ip, active_zone, _last_failure_time
if _last_failure_time > 0:
remaining = FAILURE_COOLDOWN - (time.time() - _last_failure_time)
if remaining > 0:
log.info(f"GPU cooldown active ({int(remaining)}s remaining), waiting...")
await asyncio.sleep(remaining)
_last_failure_time = 0
async with _startup_lock:
ok, hours = check_budget()
if not ok:
raise RuntimeError(f"Monthly GPU limit reached ({hours:.1f}h / {MONTHLY_LIMIT_HOURS}h)")
if active_zone:
info = await get_instance_info(active_zone["zone"], active_zone["instance"])
if info and info.get("status") == "RUNNING":
gpu_ip = extract_ip(info)
if gpu_ip:
return gpu_ip
errors = []
for config in ZONE_FALLBACKS:
zone = config["zone"]
instance = config["instance"]
label = config["label"]
log.info(f"Trying {label}...")
info = await get_instance_info(zone, instance)
if info is None:
created = await create_instance_from_snapshot(config)
if not created:
zone_status[label] = {
"status": "no_capacity",
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
"last_error": "no capacity",
}
errors.append(f"{label}: no capacity")
continue
if not await wait_for_running(zone, instance, grace=30):
zone_status[label] = {
"status": "error",
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
"last_error": "created but failed to start",
}
errors.append(f"{label}: created but failed to start")
await delete_instance(zone, instance)
await asyncio.sleep(3)
continue
else:
status = info.get("status", "UNKNOWN")
if status == "RUNNING":
pass
elif status in ("TERMINATED", "STOPPED"):
zone_status[label] = {
"status": "starting",
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
"last_error": None,
}
started = await start_instance_in_zone(zone, instance)
if not started:
zone_status[label] = {
"status": "error",
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
"last_error": "start rejected",
}
errors.append(f"{label}: start rejected")
continue
if not await wait_for_running(zone, instance, grace=20):
zone_status[label] = {
"status": "error",
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
"last_error": "didn't reach RUNNING",
}
errors.append(f"{label}: didn't reach RUNNING")
continue
elif status in ("STAGING", "PROVISIONING"):
zone_status[label] = {
"status": "starting",
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
"last_error": None,
}
if not await wait_for_running(zone, instance):
zone_status[label] = {
"status": "error",
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
"last_error": f"stuck in {status}",
}
errors.append(f"{label}: stuck in {status}")
continue
elif status == "STOPPING":
log.info(f"{label}: STOPPING, deleting to free quota")
await delete_instance(zone, instance)
await asyncio.sleep(3)
zone_status[label] = {
"status": "error",
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
"last_error": "was STOPPING, deleted",
}
errors.append(f"{label}: was STOPPING, deleted")
continue
info = await get_instance_info(zone, instance)
if info and info.get("status") == "RUNNING":
gpu_ip = extract_ip(info)
if gpu_ip:
active_zone = config
_last_failure_time = 0
zone_status[label] = {
"status": "running",
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
"last_error": None,
}
stats = load_stats()
stats["last_start"] = time.time()
stats["requests"] = stats.get("requests", 0) + 1
stats["active_zone"] = label
save_stats(stats)
log.info(f"GPU ready in {label}, IP: {gpu_ip}")
return gpu_ip
zone_status[label] = {
"status": "error",
"last_tried": time.strftime("%Y-%m-%dT%H:%M:%S"),
"last_error": "running but no IP",
}
errors.append(f"{label}: running but no IP")
_last_failure_time = time.time()
raise RuntimeError(
f"No GPU available in any Canadian zone. Tried: {'; '.join(errors)}"
)
async def ensure_gpu_ready() -> str:
ip = await ensure_gpu_running()
url = f"http://{ip}:{WHISPERX_PORT}/health"
log.info(f"Waiting for WhisperX at {url}...")
async with httpx.AsyncClient(timeout=10) as client:
for _ in range(BOOT_TIMEOUT // HEALTH_POLL_INTERVAL):
try:
resp = await client.get(url)
if resp.status_code == 200:
log.info("WhisperX is healthy!")
return ip
except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout):
pass
await asyncio.sleep(HEALTH_POLL_INTERVAL)
raise RuntimeError("WhisperX did not become healthy in time")
async def ensure_ollama_ready() -> str:
ip = await ensure_gpu_running()
url = f"http://{ip}:{OLLAMA_PORT}/api/tags"
log.info(f"Waiting for Ollama at {url}...")
async with httpx.AsyncClient(timeout=10) as client:
for _ in range(BOOT_TIMEOUT // HEALTH_POLL_INTERVAL):
try:
resp = await client.get(url)
if resp.status_code == 200:
log.info("Ollama is healthy!")
return ip
except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout):
pass
await asyncio.sleep(HEALTH_POLL_INTERVAL)
raise RuntimeError("Ollama did not become healthy in time")
async def idle_shutdown_loop():
while True:
await asyncio.sleep(60)
if last_request_time == 0 or active_zone is None:
continue
if active_requests > 0:
continue
elapsed = time.time() - last_request_time
if elapsed >= IDLE_TIMEOUT:
try:
zone = active_zone["zone"]
instance = active_zone["instance"]
label = active_zone["label"]
info = await get_instance_info(zone, instance)
if info and info.get("status") == "RUNNING":
log.info(f"Idle {int(elapsed)}s -- stopping {label}")
await stop_instance_in_zone(zone, instance)
track_gpu_time()
except Exception as e:
log.error(f"Error stopping: {e}")
# --- Endpoints ---
@app.on_event("startup")
async def on_startup():
global shutdown_task, _startup_lock
_startup_lock = asyncio.Lock()
await get_access_token()
shutdown_task = asyncio.create_task(idle_shutdown_loop())
zones = ", ".join(c["label"] for c in ZONE_FALLBACKS)
log.info(f"DictIA ASR Proxy started. Zones: [{zones}]. Idle: {IDLE_TIMEOUT}s, limit: {MONTHLY_LIMIT_HOURS}h")
@app.post("/asr")
async def asr_proxy(request: Request):
global last_request_time, active_requests
body = await request.body()
headers = {
k: v for k, v in request.headers.items()
if k.lower() not in ("host", "transfer-encoding")
}
last_request_time = time.time()
active_requests += 1
start_time = time.time()
result_status = 200
try:
ip = await ensure_gpu_ready()
target = f"http://{ip}:{WHISPERX_PORT}/asr"
log.info(f"Forwarding {len(body)} bytes to {target}")
async with httpx.AsyncClient(timeout=httpx.Timeout(7200.0)) as client:
resp = await client.post(target, content=body, headers=headers)
last_request_time = time.time()
result_status = resp.status_code
ct = resp.headers.get("content-type", "")
if "application/json" in ct:
return JSONResponse(content=resp.json(), status_code=resp.status_code)
else:
return JSONResponse(content=resp.text, status_code=resp.status_code)
except httpx.ReadTimeout:
result_status = 504
return JSONResponse({"error": "Transcription timeout (2h)"}, status_code=504)
except Exception as e:
result_status = 502
log.error(f"Proxy error: {e}")
return JSONResponse({"error": str(e)}, status_code=502)
finally:
active_requests -= 1
last_request_time = time.time()
request_history.insert(0, {
"time": time.strftime("%Y-%m-%dT%H:%M:%S"),
"type": "ASR",
"duration_sec": round(time.time() - start_time, 1),
"status": result_status,
"zone": active_zone["label"] if active_zone else "none",
})
if len(request_history) > MAX_HISTORY:
request_history.pop()
@app.get("/health")
async def health():
zone_label = active_zone["label"] if active_zone else "none"
gpu_status = "unknown"
if active_zone:
try:
info = await get_instance_info(active_zone["zone"], active_zone["instance"])
gpu_status = info.get("status", "unknown") if info else "not_found"
except Exception:
pass
ok, hours = check_budget()
stats = load_stats()
return {
"proxy": "healthy",
"gpu_instance": gpu_status,
"gpu_zone": zone_label,
"active_requests": active_requests,
"idle_timeout": IDLE_TIMEOUT,
"usage": {
"month": stats.get("month"),
"gpu_hours": round(hours, 2),
"gpu_limit_hours": MONTHLY_LIMIT_HOURS,
"requests_count": stats.get("requests", 0),
"budget_ok": ok,
},
"gpu_ip": gpu_ip,
"machine_type": active_zone.get("machine_type", "unknown") if active_zone else "unknown",
"gpu_model": active_zone.get("accelerator", "unknown") if active_zone else "unknown",
"idle_seconds": round(time.time() - last_request_time) if last_request_time > 0 else 0,
"auto_shutdown_in": max(0, IDLE_TIMEOUT - round(time.time() - last_request_time)) if last_request_time > 0 and active_zone else None,
"token_expires_in": round(_token_expiry - time.time()) if _token_expiry > 0 else None,
}
@app.get("/stats")
async def get_stats():
stats = load_stats()
hours = stats.get("gpu_seconds", 0) / 3600
gpu_cost = hours * GPU_COST_PER_HOUR
total_cost = gpu_cost + FIXED_MONTHLY_COST
return {
"month": stats.get("month"),
"gpu_hours": round(hours, 2),
"gpu_minutes": round(hours * 60, 1),
"estimated_cost_usd": round(total_cost, 2),
"gpu_cost_usd": round(gpu_cost, 2),
"fixed_cost_usd": FIXED_MONTHLY_COST,
"monthly_limit_hours": MONTHLY_LIMIT_HOURS,
"remaining_hours": round(MONTHLY_LIMIT_HOURS - hours, 2),
"requests_count": stats.get("requests", 0),
"active_zone": stats.get("active_zone", "none"),
"cost_per_hour": GPU_COST_PER_HOUR,
"recent_requests": request_history[:10],
"zone_fallbacks": [
{
"label": config["label"],
"zone": config["zone"],
"machine": config["machine_type"],
"gpu": config["accelerator"],
**zone_status.get(config["label"], {"status": "unknown", "last_tried": None, "last_error": None}),
}
for config in ZONE_FALLBACKS
],
}
@app.post("/gpu/start")
async def gpu_start():
try:
ip = await ensure_gpu_ready()
label = active_zone["label"] if active_zone else "unknown"
return {"status": "running", "ip": ip, "zone": label}
except Exception as e:
return JSONResponse({"error": str(e)}, status_code=503)
@app.post("/gpu/stop")
async def gpu_stop():
if not active_zone:
return {"status": "no active instance"}
try:
await stop_instance_in_zone(active_zone["zone"], active_zone["instance"])
track_gpu_time()
return {"status": "stopped", "zone": active_zone["label"]}
except Exception as e:
return JSONResponse({"error": str(e)}, status_code=500)
DASHBOARD_HTML = Path(__file__).parent / "dashboard.html"
@app.get("/", response_class=HTMLResponse)
async def dashboard():
if DASHBOARD_HTML.exists():
return HTMLResponse(DASHBOARD_HTML.read_text(encoding="utf-8"))
return HTMLResponse("<h1>Dashboard not found</h1><p>Place dashboard.html next to proxy.py</p>", status_code=404)
@app.api_route("/v1/{path:path}", methods=["POST", "GET"])
async def llm_proxy(request: Request, path: str):
global last_request_time, active_requests
body = await request.body()
headers = {
k: v for k, v in request.headers.items()
if k.lower() not in ("host", "transfer-encoding")
}
last_request_time = time.time()
active_requests += 1
start_time = time.time()
result_status = 200
try:
ip = await ensure_ollama_ready()
target = f"http://{ip}:{OLLAMA_PORT}/v1/{path}"
log.info(f"Forwarding LLM request to {target}")
async with httpx.AsyncClient(timeout=httpx.Timeout(300.0)) as client:
resp = await client.request(request.method, target, content=body, headers=headers)
last_request_time = time.time()
result_status = resp.status_code
return Response(
content=resp.content,
status_code=resp.status_code,
media_type=resp.headers.get("content-type"),
)
except httpx.ReadTimeout:
result_status = 504
return JSONResponse({"error": "LLM timeout (5min)"}, status_code=504)
except Exception as e:
result_status = 502
log.error(f"LLM proxy error: {e}")
return JSONResponse({"error": str(e)}, status_code=502)
finally:
active_requests -= 1
last_request_time = time.time()
request_history.insert(0, {
"time": time.strftime("%Y-%m-%dT%H:%M:%S"),
"type": "LLM",
"duration_sec": round(time.time() - start_time, 1),
"status": result_status,
"zone": active_zone["label"] if active_zone else "none",
})
if len(request_history) > MAX_HISTORY:
request_history.pop()
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=9090)

View File

@@ -0,0 +1,5 @@
fastapi==0.115.0
uvicorn==0.30.0
httpx==0.27.0
PyJWT==2.9.0
cryptography>=43.0.0

View File

@@ -0,0 +1,87 @@
#!/usr/bin/env bash
# DictIA ASR Proxy — Setup script
# Installs the GCP GPU proxy for cloud deployments.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
INSTALL_DIR="${ASR_PROXY_DIR:-$SCRIPT_DIR}"
SERVICE_USER="${ASR_PROXY_USER:-$(whoami)}"
echo "=== DictIA ASR Proxy Setup ==="
echo "Install directory: $INSTALL_DIR"
echo "Service user: $SERVICE_USER"
echo
# 1. Create virtual environment
if [ ! -d "$INSTALL_DIR/venv" ]; then
echo "[1/4] Creating Python virtual environment..."
python3 -m venv "$INSTALL_DIR/venv"
else
echo "[1/4] Virtual environment already exists."
fi
# 2. Install dependencies
echo "[2/4] Installing Python dependencies..."
"$INSTALL_DIR/venv/bin/pip" install --quiet --upgrade pip
"$INSTALL_DIR/venv/bin/pip" install --quiet -r "$INSTALL_DIR/requirements.txt"
# 3. GCP credentials
if [ ! -f "$INSTALL_DIR/gcp-credentials.json" ]; then
echo "[3/4] GCP credentials not found."
echo " Place your GCP service account or OAuth credentials at:"
echo " $INSTALL_DIR/gcp-credentials.json"
echo
echo " For service account: download JSON from GCP Console > IAM > Service Accounts"
echo " For user credentials: run 'gcloud auth application-default login' and copy the file"
echo
read -rp " Path to credentials file (or press Enter to skip): " CREDS_PATH
if [ -n "$CREDS_PATH" ] && [ -f "$CREDS_PATH" ]; then
cp "$CREDS_PATH" "$INSTALL_DIR/gcp-credentials.json"
chmod 600 "$INSTALL_DIR/gcp-credentials.json"
echo " Credentials copied."
else
echo " Skipped. You must add credentials before starting the proxy."
fi
else
echo "[3/4] GCP credentials found."
fi
# 4. Install systemd service
echo "[4/4] Installing systemd service..."
SERVICE_FILE="/etc/systemd/system/asr-proxy.service"
cat > /tmp/asr-proxy.service <<UNIT
[Unit]
Description=DictIA ASR Proxy - GPU Auto-Start/Stop for WhisperX
After=network.target
[Service]
Type=simple
User=$SERVICE_USER
Restart=always
RestartSec=10
WorkingDirectory=$INSTALL_DIR
ExecStart=$INSTALL_DIR/venv/bin/python proxy.py
Environment=GOOGLE_APPLICATION_CREDENTIALS=$INSTALL_DIR/gcp-credentials.json
Environment=STATS_FILE=$INSTALL_DIR/usage-stats.json
[Install]
WantedBy=multi-user.target
UNIT
if [ "$(id -u)" -eq 0 ]; then
cp /tmp/asr-proxy.service "$SERVICE_FILE"
systemctl daemon-reload
systemctl enable asr-proxy.service
echo " Service installed and enabled."
echo " Start with: systemctl start asr-proxy"
else
echo " Run as root to install systemd service, or copy manually:"
echo " sudo cp /tmp/asr-proxy.service $SERVICE_FILE"
echo " sudo systemctl daemon-reload && sudo systemctl enable asr-proxy"
fi
echo
echo "=== Setup complete ==="
echo "Dashboard: http://localhost:9090"
echo "Health: http://localhost:9090/health"

View File

@@ -0,0 +1,83 @@
# DictIA — Nginx reverse proxy configuration
#
# Alternative to Tailscale Serve for exposing DictIA over HTTPS.
# Replace YOUR_DOMAIN with your actual domain name.
#
# Install: sudo cp dictia.conf /etc/nginx/sites-available/dictia
# sudo ln -s /etc/nginx/sites-available/dictia /etc/nginx/sites-enabled/
# sudo nginx -t && sudo systemctl reload nginx
#
# For HTTPS with Let's Encrypt:
# sudo certbot --nginx -d YOUR_DOMAIN
upstream dictia_app {
server 127.0.0.1:8899;
}
upstream asr_proxy {
server 127.0.0.1:9090;
}
server {
listen 80;
server_name YOUR_DOMAIN;
# Redirect HTTP to HTTPS (uncomment after certbot setup)
# return 301 https://$host$request_uri;
client_max_body_size 500M;
# DictIA app
location / {
proxy_pass http://dictia_app;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# WebSocket support (for real-time features)
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
# Long timeouts for transcription uploads
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;
proxy_connect_timeout 60s;
}
# ASR Proxy dashboard (optional, restrict access)
location /asr-proxy/ {
proxy_pass http://asr_proxy/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}
# HTTPS server block (managed by certbot, uncomment after setup)
# server {
# listen 443 ssl;
# server_name YOUR_DOMAIN;
#
# ssl_certificate /etc/letsencrypt/live/YOUR_DOMAIN/fullchain.pem;
# ssl_certificate_key /etc/letsencrypt/live/YOUR_DOMAIN/privkey.pem;
# include /etc/letsencrypt/options-ssl-nginx.conf;
# ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;
#
# client_max_body_size 500M;
#
# location / {
# proxy_pass http://dictia_app;
# proxy_set_header Host $host;
# proxy_set_header X-Real-IP $remote_addr;
# proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
# proxy_set_header X-Forwarded-Proto $scheme;
# proxy_http_version 1.1;
# proxy_set_header Upgrade $http_upgrade;
# proxy_set_header Connection "upgrade";
# proxy_read_timeout 3600s;
# proxy_send_timeout 3600s;
# }
# }

View File

@@ -0,0 +1,15 @@
[Unit]
Description=DictIA - Docker Compose Application
After=docker.service
Requires=docker.service
[Service]
Type=oneshot
RemainAfterExit=yes
WorkingDirectory=/opt/dictia
ExecStart=/usr/bin/docker compose -f deployment/docker/docker-compose.cloud.yml up -d
ExecStop=/usr/bin/docker compose -f deployment/docker/docker-compose.cloud.yml down
TimeoutStartSec=120
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,52 @@
#!/usr/bin/env bash
# DictIA — Tailscale Serve/Funnel setup
#
# Exposes DictIA and ASR Proxy dashboard via Tailscale HTTPS.
# Based on the VPS production configuration.
#
# Usage:
# bash setup-serve.sh [serve|funnel]
# serve — accessible only within your tailnet (default)
# funnel — accessible from the public internet
set -euo pipefail
MODE="${1:-serve}"
echo "=== DictIA Tailscale Setup ==="
echo "Mode: $MODE"
echo
# Verify Tailscale is connected
if ! tailscale status >/dev/null 2>&1; then
echo "ERROR: Tailscale is not running or not connected."
echo " Install: curl -fsSL https://tailscale.com/install.sh | sh"
echo " Connect: sudo tailscale up"
exit 1
fi
HOSTNAME=$(tailscale status --json | python3 -c "import sys,json; print(json.load(sys.stdin)['Self']['DNSName'].rstrip('.'))" 2>/dev/null || echo "unknown")
echo "Tailscale hostname: $HOSTNAME"
echo
# DictIA app on :443 → localhost:8899
echo "[1/2] Setting up DictIA app (port 443 → 8899)..."
if [ "$MODE" = "funnel" ]; then
tailscale funnel --bg --https=443 http://localhost:8899
else
tailscale serve --bg --https=443 http://localhost:8899
fi
# ASR Proxy dashboard on :9443 → localhost:9090
echo "[2/2] Setting up ASR Proxy dashboard (port 9443 → 9090)..."
if [ "$MODE" = "funnel" ]; then
tailscale funnel --bg --https=9443 http://localhost:9090
else
tailscale serve --bg --https=9443 http://localhost:9090
fi
echo
echo "=== Setup complete ==="
echo "DictIA: https://$HOSTNAME/"
echo "ASR Dashboard: https://$HOSTNAME:9443/"
echo
echo "Verify with: tailscale serve status"

View File

@@ -0,0 +1,124 @@
# =============================================================================
# DictIA — Unified Environment Configuration
# =============================================================================
#
# Copy this file to the project root as .env and edit the values.
# cp deployment/docker/.env.example .env
#
# This template combines upstream settings with DictIA deployment vars.
# See: config/env.transcription.example for full upstream documentation.
# =============================================================================
# FLASK SECRET KEY (REQUIRED — auto-generated by setup.sh)
# =============================================================================
SECRET_KEY=change-me-to-a-random-string
# =============================================================================
# DEPLOYMENT PROFILE (used by deployment scripts)
# =============================================================================
# Options: cloud, local-cpu, local-gpu
DICTIA_PROFILE=cloud
# =============================================================================
# TEXT GENERATION MODEL (REQUIRED for summaries, titles, chat)
# =============================================================================
TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
TEXT_MODEL_API_KEY=your_openrouter_api_key
TEXT_MODEL_NAME=openai/gpt-4o-mini
# =============================================================================
# TRANSCRIPTION CONFIGURATION
# =============================================================================
# For cloud profile (ASR Proxy → GCP GPU):
# ASR_BASE_URL is set automatically in docker-compose.cloud.yml
# No need to set it here.
#
# For local profiles (WhisperX sidecar):
# ASR_BASE_URL is set automatically in docker-compose.local-*.yml
# No need to set it here.
#
# For OpenAI API instead of self-hosted ASR:
# TRANSCRIPTION_API_KEY=sk-your_openai_api_key
# TRANSCRIPTION_MODEL=gpt-4o-transcribe-diarize
# ASR model (for local WhisperX profiles)
ASR_MODEL=large-v3
# HuggingFace token (required for diarization with pyannote)
# Get yours at: https://huggingface.co/settings/tokens
# Must accept: https://huggingface.co/pyannote/speaker-diarization-3.1
HF_TOKEN=
# =============================================================================
# ASR PROXY — CLOUD PROFILE ONLY
# =============================================================================
# GCP project for GPU instances
# GCP_PROJECT=your-gcp-project
# Monthly GPU budget limit in hours (default: 50)
# MONTHLY_LIMIT_HOURS=50
# Idle timeout before auto-stopping GPU (seconds, default: 300)
# IDLE_TIMEOUT=300
# =============================================================================
# APPLICATION SETTINGS
# =============================================================================
ADMIN_USERNAME=admin
ADMIN_EMAIL=admin@example.com
ADMIN_PASSWORD=changeme
ALLOW_REGISTRATION=false
TIMEZONE="America/Toronto"
LOG_LEVEL=ERROR
LOCALE=fr_CA
DEFAULT_LANGUAGE=fr
SHOW_USERNAMES_IN_UI=true
SESSION_COOKIE_HTTPONLY=true
SESSION_COOKIE_SAMESITE=Lax
SESSION_COOKIE_SECURE=true
# =============================================================================
# OPTIONAL FEATURES
# =============================================================================
ENABLE_INQUIRE_MODE=false
ENABLE_AUTO_PROCESSING=false
ENABLE_AUTO_EXPORT=false
ENABLE_AUTO_DELETION=false
ENABLE_INTERNAL_SHARING=true
ENABLE_PUBLIC_SHARING=true
ENABLE_FOLDERS=true
VIDEO_RETENTION=true
USERS_CAN_DELETE=true
# =============================================================================
# BACKGROUND PROCESSING
# =============================================================================
JOB_QUEUE_WORKERS=4
SUMMARY_QUEUE_WORKERS=4
JOB_MAX_RETRIES=3
MAX_CONCURRENT_UPLOADS=3
# =============================================================================
# TRANSCRIPTION SETTINGS
# =============================================================================
TRANSCRIPTION_CONNECTOR=asr_endpoint
USE_NEW_TRANSCRIPTION_ARCHITECTURE=true
ENABLE_CHUNKING=true
CHUNK_LIMIT=2400s
CHUNK_OVERLAP_SECONDS=5
# =============================================================================
# LLM / SUMMARY SETTINGS
# =============================================================================
SUMMARY_LANGUAGE=fr
SUMMARY_MAX_TOKENS=16000
CHAT_MAX_TOKENS=12000
ENABLE_STREAM_OPTIONS=false
ENABLE_THINKING=false
# =============================================================================
# DOCKER/DATABASE
# =============================================================================
SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
UPLOAD_FOLDER=/data/uploads

View File

@@ -0,0 +1,40 @@
# DictIA — Cloud deployment (VPS + ASR Proxy GCP GPU)
#
# Usage:
# docker compose -f deployment/docker/docker-compose.cloud.yml up -d
#
# ASR is handled by the external asr-proxy (port 9090) which auto-starts
# a GCP GPU instance on demand. DictIA connects via host.docker.internal.
services:
dictia:
build:
context: ../..
dockerfile: Dockerfile
image: innova-ai/dictia:latest
container_name: dictia
restart: unless-stopped
ports:
- "8899:8899"
env_file:
- ../../.env
environment:
- LOG_LEVEL=${LOG_LEVEL:-ERROR}
- ASR_BASE_URL=http://host.docker.internal:9090
volumes:
- ../../data/uploads:/data/uploads
- ../../data/instance:/data/instance
extra_hosts:
- "host.docker.internal:host-gateway"
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
networks:
- dictia-network
networks:
dictia-network:
driver: bridge

View File

@@ -0,0 +1,64 @@
# DictIA — Local CPU deployment (WhisperX on CPU + DictIA)
#
# Usage:
# docker compose -f deployment/docker/docker-compose.local-cpu.yml up -d
#
# Warning: CPU transcription is significantly slower than GPU.
# Expect ~10x real-time for large files (e.g., 1h audio = ~10h processing).
services:
whisperx-asr:
image: ghcr.io/jim60105/whisperx-asr:latest
container_name: whisperx-asr
restart: unless-stopped
ports:
- "9000:9000"
environment:
- ASR_MODEL=${ASR_MODEL:-large-v3}
- ASR_ENGINE=whisperx
- DEVICE=cpu
- COMPUTE_TYPE=float32
- HF_TOKEN=${HF_TOKEN:-}
volumes:
- whisperx-cache:/root/.cache
deploy:
resources:
limits:
memory: 18G
networks:
- dictia-network
dictia:
build:
context: ../..
dockerfile: Dockerfile
image: innova-ai/dictia:latest
container_name: dictia
restart: unless-stopped
ports:
- "8899:8899"
env_file:
- ../../.env
environment:
- LOG_LEVEL=${LOG_LEVEL:-ERROR}
- ASR_BASE_URL=http://whisperx-asr:9000
volumes:
- ../../data/uploads:/data/uploads
- ../../data/instance:/data/instance
depends_on:
- whisperx-asr
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
networks:
- dictia-network
volumes:
whisperx-cache:
networks:
dictia-network:
driver: bridge

View File

@@ -0,0 +1,69 @@
# DictIA — Local GPU deployment (WhisperX on NVIDIA GPU + DictIA)
#
# Usage:
# docker compose -f deployment/docker/docker-compose.local-gpu.yml up -d
#
# Prerequisites:
# - NVIDIA GPU with CUDA support
# - nvidia-container-toolkit installed
# - Docker configured with nvidia runtime
services:
whisperx-asr:
image: ghcr.io/jim60105/whisperx-asr:latest-cuda
container_name: whisperx-asr
restart: unless-stopped
ports:
- "9000:9000"
environment:
- ASR_MODEL=${ASR_MODEL:-large-v3}
- ASR_ENGINE=whisperx
- DEVICE=cuda
- COMPUTE_TYPE=float16
- HF_TOKEN=${HF_TOKEN:-}
volumes:
- whisperx-cache:/root/.cache
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
networks:
- dictia-network
dictia:
build:
context: ../..
dockerfile: Dockerfile
image: innova-ai/dictia:latest
container_name: dictia
restart: unless-stopped
ports:
- "8899:8899"
env_file:
- ../../.env
environment:
- LOG_LEVEL=${LOG_LEVEL:-ERROR}
- ASR_BASE_URL=http://whisperx-asr:9000
volumes:
- ../../data/uploads:/data/uploads
- ../../data/instance:/data/instance
depends_on:
- whisperx-asr
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
networks:
- dictia-network
volumes:
whisperx-cache:
networks:
dictia-network:
driver: bridge

View File

@@ -0,0 +1,118 @@
# Setup Local — DictIA
Guide pour deployer DictIA localement avec GPU NVIDIA ou CPU.
## Profil local-gpu
### Prerequis
- NVIDIA GPU avec support CUDA
- [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)
- Docker + Docker Compose V2
- 8GB+ RAM (16GB recommande)
- Token HuggingFace (pour la diarisation)
### Installation nvidia-container-toolkit
```bash
# Ubuntu/Debian
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \
sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
# Verifier
docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
```
### Setup DictIA
```bash
cd dictia
bash deployment/setup.sh --profile local-gpu
```
Le setup va verifier:
- nvidia-container-toolkit installe
- GPU accessible depuis Docker
- Assez de RAM disponible
### Configuration du modele
Par defaut, WhisperX utilise `large-v3`. Pour changer:
```bash
# Editer .env
ASR_MODEL=large-v3 # Meilleure qualite
# ASR_MODEL=medium # Plus rapide, qualite correcte
# ASR_MODEL=small # Tres rapide, qualite reduite
```
---
## Profil local-cpu
### Prerequis
- Docker + Docker Compose V2
- 18GB+ RAM (WhisperX CPU est gourmand)
- Patience (transcription ~10x temps reel)
### Setup
```bash
cd dictia
bash deployment/setup.sh --profile local-cpu
```
### Limitations
- Transcription lente: 1h d'audio prend ~10h
- Utilise float32 (pas de GPU acceleration)
- Limite memoire a 18GB par defaut
- Recommande pour: tests, petits fichiers, demos
Pour reduire l'utilisation memoire, utiliser un modele plus petit:
```bash
# Editer .env
ASR_MODEL=small # ou medium, base, tiny
```
---
## Verification
```bash
# Health check
bash deployment/tools/health-check.sh
# Test rapide: ouvrir le navigateur
open http://localhost:8899
# Verifier WhisperX
curl http://localhost:9000/health
```
## Gestion des containers
```bash
COMPOSE_FILE=deployment/docker/docker-compose.local-gpu.yml # ou local-cpu
# Logs
docker compose -f $COMPOSE_FILE logs -f
# Redemarrer
docker compose -f $COMPOSE_FILE restart
# Arreter
docker compose -f $COMPOSE_FILE down
# Voir l'utilisation GPU
nvidia-smi # (profil GPU seulement)
```

View File

@@ -0,0 +1,136 @@
# Maintenance — DictIA
## Backup
```bash
# Backup complet (data, .env, volumes, stats ASR)
bash deployment/tools/backup.sh
# Backup dans un repertoire specifique
bash deployment/tools/backup.sh /mnt/backups
```
Les backups sont sauvegardes dans `backups/` avec rotation automatique (garde les 5 derniers).
Contenu d'un backup:
- `data/` — uploads et base de donnees SQLite
- `dot-env` — fichier de configuration
- `asr-usage-stats.json` — stats d'utilisation GPU
- `whisperx-cache.tar.gz` — cache modeles (si volume Docker)
- `manifest.json` — metadonnees du backup
### Schedule recommande
| Frequence | Action |
|-----------|--------|
| Quotidien | `bash deployment/tools/backup.sh` |
| Hebdomadaire | Copier le backup sur un stockage externe |
| Mensuel | Verifier la restauration sur un environnement de test |
Pour automatiser avec cron:
```bash
# Backup quotidien a 3h du matin
0 3 * * * /opt/dictia/deployment/tools/backup.sh >> /var/log/dictia-backup.log 2>&1
```
## Restore
```bash
# Lister les backups disponibles
ls -la backups/
# Restaurer un backup
bash deployment/tools/restore.sh backups/dictia-20260211-030000.tar.gz
```
Le script:
1. Valide l'archive (presence du manifest)
2. Demande confirmation
3. Arrete les containers
4. Restaure les fichiers
5. Redemarre les containers
## Mise a jour
```bash
# Mise a jour complete (git pull + rebuild + restart)
bash deployment/tools/update.sh
# Rebuild seulement (sans git pull)
bash deployment/tools/update.sh --no-pull
# Git pull seulement (sans rebuild)
bash deployment/tools/update.sh --no-build
```
Le script:
1. Detecte le profil actif automatiquement
2. `git pull origin dictia-branding`
3. `docker build -t innova-ai/dictia:latest .`
4. Pull WhisperX upstream (profils locaux)
5. `docker compose down && up -d`
6. Attend le health check
7. Nettoie les images dangling
## Monitoring
### Health check
```bash
# Diagnostic complet (humain)
bash deployment/tools/health-check.sh
# JSON (pour alertes/scripts)
bash deployment/tools/health-check.sh --json
# Code de sortie seulement (0=ok, 1=probleme)
bash deployment/tools/health-check.sh --quiet
```
### Logs
```bash
# DictIA
docker logs dictia -f --tail 100
# WhisperX (profils locaux)
docker logs whisperx-asr -f --tail 100
# ASR Proxy (profil cloud)
journalctl -u asr-proxy -f
```
### Dashboard GPU (profil cloud)
Le dashboard de monitoring GPU est accessible a:
- `http://localhost:9090` (local)
- `https://votre-hostname.tailnet.ts.net:9443` (Tailscale)
Affiche: statut GPU, cout mensuel, historique des requetes, zones de fallback.
### Metriques cles
```bash
# Espace disque (les transcriptions grossissent)
df -h /opt/dictia/data/
# Utilisation memoire (WhisperX est gourmand)
docker stats --no-stream
# Stats GPU (profil cloud)
curl -s http://localhost:9090/stats | python3 -m json.tool
```
## Maintenance Docker
```bash
# Nettoyer les images orphelines
docker image prune -f
# Nettoyer tout (attention: supprime les volumes non utilises)
# docker system prune -a --volumes
# Verifier l'espace Docker
docker system df
```

View File

@@ -0,0 +1,90 @@
# Quickstart — DictIA
## Prerequis communs
- Docker + Docker Compose V2
- Git
- 2GB+ RAM disponible
```bash
git clone https://gitea.innova-ai.ca/Innova-AI/dictia.git
cd dictia
git checkout dictia-branding
```
---
## Profil Cloud (VPS + GCP GPU)
Le GPU demarre automatiquement quand quelqu'un transcrit, et s'arrete apres 5 min d'inactivite.
```bash
# 1. Setup interactif
bash deployment/setup.sh --profile cloud
# 2. Setup ASR Proxy (GCP credentials requises)
bash deployment/asr-proxy/setup.sh
# 3. Optionnel: Tailscale Serve pour HTTPS
bash deployment/config/tailscale/setup-serve.sh
```
**Requis**: credentials GCP (service account ou OAuth) dans `deployment/asr-proxy/gcp-credentials.json`.
---
## Profil Local GPU
Transcription locale sur GPU NVIDIA. Le plus rapide.
```bash
# Prerequis: nvidia-container-toolkit
# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html
# Setup
bash deployment/setup.sh --profile local-gpu
```
**Requis**: token HuggingFace pour la diarisation (pyannote).
---
## Profil Local CPU
Transcription sur CPU. Lent mais fonctionnel pour tester.
```bash
bash deployment/setup.sh --profile local-cpu
```
Prevoir ~10x le temps reel (1h audio = ~10h de traitement).
---
## Apres l'installation
```bash
# Verifier que tout fonctionne
bash deployment/tools/health-check.sh
# Ouvrir DictIA
open http://localhost:8899
```
Se connecter avec les identifiants admin configures pendant le setup.
## Commandes utiles
```bash
# Logs en temps reel
docker compose -f deployment/docker/docker-compose.<profil>.yml logs -f
# Redemarrer
docker compose -f deployment/docker/docker-compose.<profil>.yml restart
# Mise a jour
bash deployment/tools/update.sh
# Backup
bash deployment/tools/backup.sh
```

View File

@@ -0,0 +1,177 @@
# Troubleshooting — DictIA
## WhisperX OOM (Out of Memory)
**Symptome**: Container `whisperx-asr` crash ou restart en boucle.
**Cause**: Modele trop gros pour la RAM/VRAM disponible.
**Solutions**:
```bash
# Utiliser un modele plus petit dans .env
ASR_MODEL=medium # au lieu de large-v3
# Augmenter la limite memoire (local-cpu)
# Editer docker-compose.local-cpu.yml
deploy:
resources:
limits:
memory: 24G # au lieu de 18G
```
## Diarisation 403 Forbidden
**Symptome**: Erreur 403 lors de la transcription avec diarisation.
**Cause**: Token HuggingFace manquant ou conditions non acceptees.
**Solution**:
1. Creer un token: https://huggingface.co/settings/tokens
2. Accepter les conditions: https://huggingface.co/pyannote/speaker-diarization-3.1
3. Ajouter dans `.env`:
```bash
HF_TOKEN=hf_votre_token
```
4. Redemarrer: `docker compose -f deployment/docker/docker-compose.<profil>.yml restart`
## GPU non detecte (local-gpu)
**Symptome**: `nvidia-smi` fonctionne mais Docker ne voit pas le GPU.
**Solution**:
```bash
# Installer nvidia-container-toolkit
sudo apt install -y nvidia-container-toolkit
sudo nvidia-ctk runtime configure --runtime=docker
sudo systemctl restart docker
# Verifier
docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
```
## Upload echoue (fichiers volumineux)
**Symptome**: Upload de gros fichiers (>100MB) echoue.
**Causes possibles**:
- Timeout Nginx/reverse proxy
- Limite upload trop basse
**Solutions**:
```bash
# Si Nginx: verifier client_max_body_size dans dictia.conf
client_max_body_size 500M;
# Si Tailscale Serve: pas de limite cote Tailscale
# Timeout gunicorn (dans le Dockerfile, deja a 600s)
# Pour des fichiers tres longs, augmenter dans docker-compose:
environment:
- GUNICORN_TIMEOUT=1200
```
## Container dictia "unhealthy"
**Symptome**: `docker ps` montre "unhealthy" pour le container dictia.
**Diagnostic**:
```bash
# Voir les logs
docker logs dictia --tail 50
# Tester manuellement
docker exec dictia python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"
```
**Causes courantes**:
- `.env` mal configure (SECRET_KEY manquant)
- Base de donnees corrompue (restaurer backup)
- Port 8899 deja utilise
## ASR Proxy: "No GPU available"
**Symptome**: Transcription echoue avec "No GPU available in any Canadian zone".
**Causes**:
- GCP n'a pas de GPU disponible (capacite epuisee)
- Credentials GCP expirees
- Budget mensuel atteint
**Diagnostic**:
```bash
# Verifier le statut du proxy
curl -s http://localhost:9090/health | python3 -m json.tool
# Verifier les stats (budget)
curl -s http://localhost:9090/stats | python3 -m json.tool
# Voir les logs
journalctl -u asr-proxy --since "1 hour ago"
```
**Solutions**:
- Attendre (GCP libere des GPUs regulierement)
- Le proxy reessaie automatiquement apres un cooldown de 3 minutes
- Verifier le dashboard: http://localhost:9090
## Build Docker lent/echoue
**Symptome**: `docker build` prend trop de temps ou echoue.
**Solutions**:
```bash
# Limiter les ressources si le VPS est petit
docker build --memory=2g --cpus=2 -t innova-ai/dictia:latest .
# Nettoyer le cache Docker si le disque est plein
docker builder prune -f
docker image prune -f
```
## Base de donnees corrompue
**Symptome**: Erreur SQLite au demarrage.
**Solution**:
```bash
# Restaurer le dernier backup
bash deployment/tools/restore.sh backups/dictia-LATEST.tar.gz
# Ou recreer la base (perd les donnees)
rm data/instance/transcriptions.db
docker compose -f deployment/docker/docker-compose.<profil>.yml restart
```
## Port 8899 deja utilise
```bash
# Trouver qui utilise le port
sudo lsof -i :8899
# ou
sudo ss -tlnp | grep 8899
# Arreter le processus ou changer le port dans docker-compose
ports:
- "8900:8899" # utiliser 8900 a la place
```
## Mise a jour qui casse tout
```bash
# Rollback: revenir au commit precedent
cd dictia
git log --oneline -5 # trouver le bon commit
git checkout <commit-hash>
# Rebuild et redemarrer
docker build -t innova-ai/dictia:latest .
docker compose -f deployment/docker/docker-compose.<profil>.yml down
docker compose -f deployment/docker/docker-compose.<profil>.yml up -d
```
## Commande de diagnostic rapide
```bash
# Tout verifier d'un coup
bash deployment/tools/health-check.sh --json | python3 -m json.tool
```

View File

@@ -0,0 +1,148 @@
# Setup VPS from scratch — DictIA
Guide complet pour deployer DictIA sur un VPS Ubuntu.
Teste sur OVH VPS avec Ubuntu 22.04/24.04.
## 1. Preparation du VPS
```bash
# Mise a jour systeme
sudo apt update && sudo apt upgrade -y
# Installer les essentiels
sudo apt install -y curl git
```
## 2. Docker
```bash
# Installer Docker (methode officielle)
curl -fsSL https://get.docker.com | sh
# Ajouter l'utilisateur au groupe docker
sudo usermod -aG docker $USER
# Se reconnecter pour appliquer le groupe
exit
# (reconnecter via SSH)
# Verifier
docker --version
docker compose version
```
## 3. Tailscale (recommande)
Tailscale fournit un VPN mesh pour acceder au VPS sans exposer de ports publics.
```bash
# Installer Tailscale
curl -fsSL https://tailscale.com/install.sh | sh
# Connecter au tailnet
sudo tailscale up
# Verifier
tailscale status
```
## 4. DictIA
```bash
# Cloner le repo
cd ~
git clone https://gitea.innova-ai.ca/Innova-AI/dictia.git
cd dictia
git checkout dictia-branding
# Lancer le setup
bash deployment/setup.sh --profile cloud
```
Le setup va:
- Generer le `.env` avec vos identifiants
- Creer les repertoires de donnees
- Builder l'image Docker
- Demarrer les containers
## 5. ASR Proxy (GCP GPU)
```bash
# Installer le proxy
bash deployment/asr-proxy/setup.sh
# Ajouter les credentials GCP
# Copier votre fichier de credentials dans:
cp ~/gcp-credentials.json deployment/asr-proxy/gcp-credentials.json
# Demarrer le service
sudo systemctl start asr-proxy
sudo systemctl status asr-proxy
```
## 6. Securite
```bash
# Docker daemon config (log rotation)
sudo cp deployment/security/docker-daemon.json /etc/docker/daemon.json
sudo systemctl restart docker
# Firewall iptables (bloque trafic non-Tailscale)
sudo bash deployment/security/iptables-rules.sh
# Service systemd pour les regles au boot
sudo cp deployment/security/docker-iptables.service /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl enable docker-iptables
```
## 7. Tailscale Serve (HTTPS)
```bash
# Expose DictIA et le dashboard ASR via Tailscale HTTPS
bash deployment/config/tailscale/setup-serve.sh
# Verifier
tailscale serve status
```
DictIA sera accessible a `https://votre-hostname.tailnet.ts.net/`.
## 8. Service systemd (auto-start)
```bash
# Adapter le chemin dans le fichier si necessaire
sudo cp deployment/config/systemd/dictia.service /etc/systemd/system/
sudo systemctl daemon-reload
sudo systemctl enable dictia
```
## 9. Verification
```bash
# Health check complet
bash deployment/tools/health-check.sh
# Verifier les endpoints
curl -s http://localhost:8899/health
curl -s http://localhost:9090/health
```
## 10. Premier backup
```bash
bash deployment/tools/backup.sh
```
---
## Checklist post-installation
- [ ] DictIA repond sur :8899
- [ ] ASR Proxy repond sur :9090
- [ ] Tailscale Serve configure
- [ ] iptables: seul Tailscale peut acceder
- [ ] Docker: log rotation configuree
- [ ] Service systemd enable (auto-start au boot)
- [ ] Premier backup effectue
- [ ] Identifiants admin testes

View File

@@ -0,0 +1,101 @@
# =============================================================================
# DictIA 16 — Docker Compose
# GPU : RTX 5070 Ti (16 Go VRAM)
# =============================================================================
#
# Services :
# - dictia : Application principale DictIA
# - whisperx-asr : Service de transcription WhisperX Large-v3
# - ollama : LLM local Mistral 7B (résumés, chat, Q&A)
#
# Démarrage :
# 1. cp config/env.dictia16.example .env
# 2. docker compose -f config/docker-compose.dictia16.yml up -d
# 3. Télécharger Mistral : docker exec ollama ollama pull mistral
#
# Note : Aucune clé API nécessaire — tout tourne en local (100% privé).
# =============================================================================
services:
# ---------------------------------------------------------------------------
# Application DictIA
# ---------------------------------------------------------------------------
dictia:
image: dictia:latest
container_name: dictia
restart: unless-stopped
ports:
- "8899:8899"
env_file:
- ../.env
environment:
- LOG_LEVEL=ERROR
volumes:
- ../uploads:/data/uploads
- ../instance:/data/instance
# Décommenter pour l'export automatique :
# - ../exports:/data/exports
# Décommenter pour le traitement automatique :
# - ../auto-process:/data/auto-process
depends_on:
- whisperx-asr
- ollama
networks:
- dictia-net
# ---------------------------------------------------------------------------
# WhisperX ASR — Transcription locale (WhisperX Large-v3)
# RTX 5070 Ti : BATCH_SIZE=32, COMPUTE_TYPE=float16
# ---------------------------------------------------------------------------
whisperx-asr:
image: murtazanasir/whisperx-asr-service:latest
container_name: whisperx-asr
restart: unless-stopped
environment:
- HF_TOKEN=${HF_TOKEN}
- DEVICE=cuda
- COMPUTE_TYPE=float16
- BATCH_SIZE=32
- DEFAULT_MODEL=large-v3
volumes:
- whisperx-models:/root/.cache
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
networks:
- dictia-net
# ---------------------------------------------------------------------------
# Ollama — LLM local Mistral 7B
# Résumés, points d'action, Q&A — 100% local, aucune donnée externe
# ---------------------------------------------------------------------------
ollama:
image: ollama/ollama:latest
container_name: ollama
restart: unless-stopped
volumes:
- ollama-models:/root/.ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
networks:
- dictia-net
networks:
dictia-net:
driver: bridge
volumes:
whisperx-models:
driver: local
ollama-models:
driver: local

View File

@@ -0,0 +1,75 @@
# =============================================================================
# DictIA 8 — Docker Compose
# GPU : RTX 5060 (8 Go VRAM)
# =============================================================================
#
# Services :
# - dictia : Application principale DictIA
# - whisperx-asr : Service de transcription WhisperX Large-v3
#
# Démarrage :
# 1. cp config/env.dictia8.example .env
# 2. Remplir TEXT_MODEL_API_KEY dans .env
# 3. docker compose -f config/docker-compose.dictia8.yml up -d
# =============================================================================
services:
# ---------------------------------------------------------------------------
# Application DictIA
# ---------------------------------------------------------------------------
dictia:
image: dictia:latest
container_name: dictia
restart: unless-stopped
ports:
- "8899:8899"
env_file:
- ../.env
environment:
- LOG_LEVEL=ERROR
volumes:
- ../uploads:/data/uploads
- ../instance:/data/instance
# Décommenter pour l'export automatique :
# - ../exports:/data/exports
# Décommenter pour le traitement automatique :
# - ../auto-process:/data/auto-process
depends_on:
- whisperx-asr
networks:
- dictia-net
# ---------------------------------------------------------------------------
# WhisperX ASR — Transcription locale (WhisperX Large-v3)
# RTX 5060 : BATCH_SIZE=16, COMPUTE_TYPE=float16
# ---------------------------------------------------------------------------
whisperx-asr:
image: murtazanasir/whisperx-asr-service:latest
container_name: whisperx-asr
restart: unless-stopped
environment:
- HF_TOKEN=${HF_TOKEN}
- DEVICE=cuda
- COMPUTE_TYPE=float16
- BATCH_SIZE=16
- DEFAULT_MODEL=large-v3
volumes:
- whisperx-models:/root/.cache
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
networks:
- dictia-net
networks:
dictia-net:
driver: bridge
volumes:
whisperx-models:
driver: local

View File

@@ -0,0 +1,134 @@
# =============================================================================
# DictIA 16 — Configuration (.env)
# GPU : RTX 5070 Ti (16 Go VRAM)
# =============================================================================
#
# Architecture :
# - Transcription : WhisperX Large-v3 (local, ~5,5 Go VRAM)
# - LLM (résumés) : Mistral 7B local via Ollama (~6,4 Go VRAM)
# - Mode : Séquentiel (transcription puis résumé)
# - Total VRAM : ~11,9 Go / 16 Go (marge ~4,1 Go)
#
# Démarrage rapide :
# 1. cp config/env.dictia16.example .env
# 2. Aucune clé API nécessaire — tout tourne en local
# 3. docker compose -f config/docker-compose.dictia16.yml up -d
# =============================================================================
# =============================================================================
# MODÈLE DE TEXTE — Résumés, titres, chat (LLM LOCAL)
# =============================================================================
# DictIA 16 utilise Mistral 7B en local via Ollama.
# Aucune donnée ne quitte le serveur — 100% privé.
TEXT_MODEL_BASE_URL=http://ollama:11434/v1
TEXT_MODEL_API_KEY=not-required
TEXT_MODEL_NAME=mistral
# --- Modèle de chat séparé (optionnel) ---
# Même modèle par défaut, mais peut être changé pour un modèle plus rapide.
# CHAT_MODEL_API_KEY=not-required
# CHAT_MODEL_BASE_URL=http://ollama:11434/v1
# CHAT_MODEL_NAME=mistral
# =============================================================================
# TRANSCRIPTION — WhisperX ASR local (REQUIS)
# =============================================================================
# WhisperX tourne en local dans un conteneur Docker séparé.
# Le service ASR est défini dans docker-compose.dictia16.yml.
ASR_BASE_URL=http://whisperx-asr:9000
# Diarisation (identification automatique des locuteurs) — recommandé
ASR_DIARIZE=true
ASR_RETURN_SPEAKER_EMBEDDINGS=true
# Nombre de locuteurs attendus (optionnel — aide la précision)
# ASR_MIN_SPEAKERS=1
# ASR_MAX_SPEAKERS=6
# =============================================================================
# PARAMÈTRES ADMINISTRATEUR
# =============================================================================
ADMIN_USERNAME=admin
ADMIN_EMAIL=admin@votreentreprise.com
ADMIN_PASSWORD=changeme
# =============================================================================
# ACCÈS ET INSCRIPTION
# =============================================================================
# Désactiver l'inscription publique (accès sur invitation uniquement)
ALLOW_REGISTRATION=false
# Restreindre l'inscription aux domaines autorisés
# Exemple : REGISTRATION_ALLOWED_DOMAINS=votreentreprise.com
REGISTRATION_ALLOWED_DOMAINS=
# =============================================================================
# FUSEAU HORAIRE
# =============================================================================
# Exemples : America/Toronto, America/Montreal, America/New_York, UTC
TIMEZONE="America/Toronto"
# =============================================================================
# LIMITES DE TOKENS
# =============================================================================
SUMMARY_MAX_TOKENS=8000
CHAT_MAX_TOKENS=5000
# =============================================================================
# COMPRESSION AUDIO
# =============================================================================
AUDIO_COMPRESS_UPLOADS=true
AUDIO_CODEC=mp3
AUDIO_BITRATE=128k
# =============================================================================
# FONCTIONNALITÉS OPTIONNELLES
# =============================================================================
# Inquire Mode — recherche IA sur tous les enregistrements
# Peut être activé sur DictIA 16 (plus de VRAM disponible)
ENABLE_INQUIRE_MODE=false
# Traitement automatique de fichiers (dossier surveillé)
ENABLE_AUTO_PROCESSING=false
# AUTO_PROCESS_MODE=admin_only
# AUTO_PROCESS_WATCH_DIR=/data/auto-process
# Export automatique
ENABLE_AUTO_EXPORT=false
# AUTO_EXPORT_DIR=/data/exports
# AUTO_EXPORT_TRANSCRIPTION=true
# AUTO_EXPORT_SUMMARY=true
# Suppression automatique / rétention
ENABLE_AUTO_DELETION=false
# GLOBAL_RETENTION_DAYS=90
# DELETION_MODE=audio_only
# =============================================================================
# PARTAGE
# =============================================================================
ENABLE_INTERNAL_SHARING=false
ENABLE_PUBLIC_SHARING=true
USERS_CAN_DELETE=true
# =============================================================================
# FILES D'ATTENTE DE TRAITEMENT
# =============================================================================
JOB_QUEUE_WORKERS=2
SUMMARY_QUEUE_WORKERS=2
JOB_MAX_RETRIES=3
# =============================================================================
# BASE DE DONNÉES ET STOCKAGE
# =============================================================================
SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
UPLOAD_FOLDER=/data/uploads
# =============================================================================
# JOURNALISATION
# =============================================================================
# ERROR = production (minimal), INFO = débogage, DEBUG = développement
LOG_LEVEL=ERROR

View File

@@ -0,0 +1,126 @@
# =============================================================================
# DictIA 8 — Configuration (.env)
# GPU : RTX 5060 (8 Go VRAM)
# =============================================================================
#
# Architecture :
# - Transcription : WhisperX Large-v3 (local, ~5,5 Go VRAM)
# - LLM (résumés) : API cloud via OpenRouter (VRAM insuffisante pour LLM local)
#
# Démarrage rapide :
# 1. cp config/env.dictia8.example .env
# 2. Remplir TRANSCRIPTION_API_KEY et TEXT_MODEL_API_KEY
# 3. docker compose -f config/docker-compose.dictia8.yml up -d
# =============================================================================
# =============================================================================
# MODÈLE DE TEXTE — Résumés, titres, chat (REQUIS)
# =============================================================================
# DictIA 8 utilise un LLM cloud via OpenRouter (VRAM insuffisante pour LLM local).
# Inscrivez-vous sur https://openrouter.ai pour obtenir une clé API.
TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
TEXT_MODEL_API_KEY=votre_cle_openrouter
TEXT_MODEL_NAME=openai/gpt-4o-mini
# =============================================================================
# TRANSCRIPTION — WhisperX ASR local (REQUIS)
# =============================================================================
# WhisperX tourne en local dans un conteneur Docker séparé.
# Le service ASR est défini dans docker-compose.dictia8.yml.
ASR_BASE_URL=http://whisperx-asr:9000
# Diarisation (identification automatique des locuteurs) — recommandé
ASR_DIARIZE=true
ASR_RETURN_SPEAKER_EMBEDDINGS=true
# Nombre de locuteurs attendus (optionnel — aide la précision)
# ASR_MIN_SPEAKERS=1
# ASR_MAX_SPEAKERS=6
# =============================================================================
# PARAMÈTRES ADMINISTRATEUR
# =============================================================================
ADMIN_USERNAME=admin
ADMIN_EMAIL=admin@votreentreprise.com
ADMIN_PASSWORD=changeme
# =============================================================================
# ACCÈS ET INSCRIPTION
# =============================================================================
# Désactiver l'inscription publique (accès sur invitation uniquement)
ALLOW_REGISTRATION=false
# Restreindre l'inscription aux domaines autorisés
# Exemple : REGISTRATION_ALLOWED_DOMAINS=votreentreprise.com
REGISTRATION_ALLOWED_DOMAINS=
# =============================================================================
# FUSEAU HORAIRE
# =============================================================================
# Exemples : America/Toronto, America/Montreal, America/New_York, UTC
TIMEZONE="America/Toronto"
# =============================================================================
# LIMITES DE TOKENS
# =============================================================================
SUMMARY_MAX_TOKENS=8000
CHAT_MAX_TOKENS=5000
# =============================================================================
# COMPRESSION AUDIO
# =============================================================================
AUDIO_COMPRESS_UPLOADS=true
AUDIO_CODEC=mp3
AUDIO_BITRATE=128k
# =============================================================================
# FONCTIONNALITÉS OPTIONNELLES
# =============================================================================
# Inquire Mode — recherche IA sur tous les enregistrements
# Désactivé sur DictIA 8 (VRAM insuffisante pour embeddings locaux)
ENABLE_INQUIRE_MODE=false
# Traitement automatique de fichiers (dossier surveillé)
ENABLE_AUTO_PROCESSING=false
# AUTO_PROCESS_MODE=admin_only
# AUTO_PROCESS_WATCH_DIR=/data/auto-process
# Export automatique
ENABLE_AUTO_EXPORT=false
# AUTO_EXPORT_DIR=/data/exports
# AUTO_EXPORT_TRANSCRIPTION=true
# AUTO_EXPORT_SUMMARY=true
# Suppression automatique / rétention
ENABLE_AUTO_DELETION=false
# GLOBAL_RETENTION_DAYS=90
# DELETION_MODE=audio_only
# =============================================================================
# PARTAGE
# =============================================================================
ENABLE_INTERNAL_SHARING=false
ENABLE_PUBLIC_SHARING=true
USERS_CAN_DELETE=true
# =============================================================================
# FILES D'ATTENTE DE TRAITEMENT
# =============================================================================
JOB_QUEUE_WORKERS=2
SUMMARY_QUEUE_WORKERS=2
JOB_MAX_RETRIES=3
# =============================================================================
# BASE DE DONNÉES ET STOCKAGE
# =============================================================================
SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
UPLOAD_FOLDER=/data/uploads
# =============================================================================
# JOURNALISATION
# =============================================================================
# ERROR = production (minimal), INFO = débogage, DEBUG = développement
LOG_LEVEL=ERROR

View File

@@ -0,0 +1,8 @@
{
"log-driver": "json-file",
"log-opts": {
"max-size": "10m",
"max-file": "3"
},
"storage-driver": "overlay2"
}

View File

@@ -0,0 +1,12 @@
[Unit]
Description=DictIA Docker iptables rules
After=docker.service tailscaled.service
Requires=docker.service
[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/bin/bash /opt/dictia/deployment/security/iptables-rules.sh
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,36 @@
#!/usr/bin/env bash
# DictIA — iptables rules for cloud VPS
#
# Allows Docker internal traffic to reach the ASR proxy on port 9090.
# Blocks direct external access to Docker container IPs.
# Tailscale + UFW handle the main firewall — this script adds Docker-specific rules.
#
# Usage: sudo bash iptables-rules.sh
set -euo pipefail
echo "=== DictIA iptables rules ==="
# Allow Docker containers (172.16.0.0/12) to reach ASR proxy on port 9090
# This rule goes BEFORE the default DROP policy so containers can talk to the proxy
iptables -C INPUT -s 172.16.0.0/12 -p tcp --dport 9090 -j ACCEPT 2>/dev/null \
|| iptables -I INPUT 1 -s 172.16.0.0/12 -p tcp --dport 9090 -j ACCEPT
# Block direct external access to Docker container IPs (raw table, before conntrack)
# Protects containers on non-default bridge networks (e.g., dictia-network)
for NETWORK_ID in $(docker network ls --filter driver=bridge --format '{{.ID}}' 2>/dev/null); do
BRIDGE=$(docker network inspect "$NETWORK_ID" --format '{{.Options.com.docker.network.bridge.name}}' 2>/dev/null || echo "")
[ -z "$BRIDGE" ] && continue
[ "$BRIDGE" = "docker0" ] && continue
for CONTAINER_IP in $(docker network inspect "$NETWORK_ID" \
--format '{{range .Containers}}{{.IPv4Address}} {{end}}' 2>/dev/null); do
IP="${CONTAINER_IP%/*}"
[ -z "$IP" ] && continue
iptables -t raw -C PREROUTING -d "$IP" ! -i "$BRIDGE" -j DROP 2>/dev/null \
|| iptables -t raw -A PREROUTING -d "$IP" ! -i "$BRIDGE" -j DROP
echo " Protected $IP on $BRIDGE"
done
done
echo "Rules applied. Tailscale + Docker internal traffic allowed."
echo "Verify with: sudo iptables -L -n -t raw"

300
deployment/setup.sh Executable file
View File

@@ -0,0 +1,300 @@
#!/usr/bin/env bash
# DictIA — Main setup script
#
# Interactive installer that detects hardware and configures the appropriate
# deployment profile (cloud, local-cpu, local-gpu).
#
# Usage:
# bash deployment/setup.sh # Interactive mode
# bash deployment/setup.sh --profile cloud # Non-interactive
# bash deployment/setup.sh --profile local-gpu
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
PROFILE=""
for arg in "$@"; do
case "$arg" in
--profile=*) PROFILE="${arg#*=}" ;;
--profile) shift_next=true ;;
*)
if [ "${shift_next:-false}" = true ]; then
PROFILE="$arg"
shift_next=false
fi
;;
esac
done
# --- Colors ---
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
info() { echo -e "${CYAN}[INFO]${NC} $*"; }
ok() { echo -e "${GREEN}[OK]${NC} $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $*"; }
err() { echo -e "${RED}[ERROR]${NC} $*"; }
echo
echo -e "${CYAN}========================================${NC}"
echo -e "${CYAN} DictIA — Setup${NC}"
echo -e "${CYAN}========================================${NC}"
echo
# ==========================================================================
# 1. Hardware Detection
# ==========================================================================
info "Detecting hardware..."
# Docker
if command -v docker &>/dev/null && docker info &>/dev/null; then
DOCKER_VERSION=$(docker --version | grep -oP '\d+\.\d+\.\d+' | head -1)
ok "Docker $DOCKER_VERSION"
else
err "Docker not found or not running."
echo " Install Docker: https://docs.docker.com/engine/install/"
exit 1
fi
# Docker Compose
if docker compose version &>/dev/null; then
COMPOSE_VERSION=$(docker compose version --short 2>/dev/null || echo "unknown")
ok "Docker Compose $COMPOSE_VERSION"
else
err "Docker Compose not found."
echo " Docker Compose V2 is required (comes with Docker Desktop or docker-compose-plugin)"
exit 1
fi
# GPU
HAS_GPU=false
if command -v nvidia-smi &>/dev/null; then
GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader 2>/dev/null | head -1 || echo "")
if [ -n "$GPU_NAME" ]; then
HAS_GPU=true
ok "NVIDIA GPU: $GPU_NAME"
# Check nvidia-container-toolkit
if docker info 2>/dev/null | grep -qi nvidia; then
ok "nvidia-container-toolkit detected"
else
warn "nvidia-container-toolkit not detected. Required for local-gpu profile."
echo " Install: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
fi
fi
else
info "No NVIDIA GPU detected"
fi
# RAM
if command -v free &>/dev/null; then
RAM_GB=$(free -g | awk '/Mem:/{print $2}')
info "RAM: ${RAM_GB}GB"
fi
# Disk
DISK_AVAIL=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $4}')
info "Disk available: $DISK_AVAIL"
echo
# ==========================================================================
# 2. Profile Selection
# ==========================================================================
if [ -z "$PROFILE" ]; then
echo -e "${CYAN}Select deployment profile:${NC}"
echo
echo " 1) cloud — VPS with ASR Proxy (GCP GPU on demand)"
echo " Best for: remote servers, pay-per-use GPU"
echo
echo " 2) local-gpu — Local NVIDIA GPU for transcription"
echo " Best for: dedicated GPU server, fastest"
if [ "$HAS_GPU" = false ]; then
echo -e " ${YELLOW}(No GPU detected on this machine)${NC}"
fi
echo
echo " 3) local-cpu — CPU-only transcription (slow)"
echo " Best for: testing, low-volume usage"
echo
read -rp "Choice [1-3]: " CHOICE
case "$CHOICE" in
1) PROFILE="cloud" ;;
2) PROFILE="local-gpu" ;;
3) PROFILE="local-cpu" ;;
*) err "Invalid choice"; exit 1 ;;
esac
fi
COMPOSE_FILE="$SCRIPT_DIR/docker/docker-compose.$PROFILE.yml"
if [ ! -f "$COMPOSE_FILE" ]; then
err "Compose file not found: $COMPOSE_FILE"
exit 1
fi
ok "Profile: $PROFILE"
echo
# ==========================================================================
# 3. Generate .env
# ==========================================================================
ENV_FILE="$PROJECT_DIR/.env"
if [ -f "$ENV_FILE" ]; then
warn ".env already exists. Keeping existing configuration."
echo " To reconfigure, delete .env and re-run setup."
else
info "Generating .env..."
# Generate secret key
SECRET_KEY=$(python3 -c "import secrets; print(secrets.token_hex(32))" 2>/dev/null \
|| openssl rand -hex 32 2>/dev/null \
|| head -c 64 /dev/urandom | xxd -p | head -c 64)
# Prompt for admin credentials
read -rp "Admin username [admin]: " ADMIN_USER
ADMIN_USER="${ADMIN_USER:-admin}"
read -rp "Admin email [admin@example.com]: " ADMIN_EMAIL
ADMIN_EMAIL="${ADMIN_EMAIL:-admin@example.com}"
read -rsp "Admin password: " ADMIN_PASS
echo
ADMIN_PASS="${ADMIN_PASS:-changeme}"
# Prompt for text model API key
echo
info "DictIA needs a text/LLM API key for summaries, titles, and chat."
echo " Recommended: OpenRouter (https://openrouter.ai) — access to many models"
read -rp "Text model API key (or press Enter to skip): " TEXT_API_KEY
TEXT_API_KEY="${TEXT_API_KEY:-your_openrouter_api_key}"
# HuggingFace token for diarization
if [ "$PROFILE" != "cloud" ]; then
echo
info "For speaker diarization, a HuggingFace token is needed."
echo " Get one at: https://huggingface.co/settings/tokens"
echo " Accept model: https://huggingface.co/pyannote/speaker-diarization-3.1"
read -rp "HuggingFace token (or press Enter to skip): " HF_TOKEN
HF_TOKEN="${HF_TOKEN:-}"
else
HF_TOKEN=""
fi
# Write .env
cp "$SCRIPT_DIR/docker/.env.example" "$ENV_FILE"
sed -i "s|SECRET_KEY=.*|SECRET_KEY=$SECRET_KEY|" "$ENV_FILE"
sed -i "s|DICTIA_PROFILE=.*|DICTIA_PROFILE=$PROFILE|" "$ENV_FILE"
sed -i "s|ADMIN_USERNAME=.*|ADMIN_USERNAME=$ADMIN_USER|" "$ENV_FILE"
sed -i "s|ADMIN_EMAIL=.*|ADMIN_EMAIL=$ADMIN_EMAIL|" "$ENV_FILE"
sed -i "s|ADMIN_PASSWORD=.*|ADMIN_PASSWORD=$ADMIN_PASS|" "$ENV_FILE"
sed -i "s|TEXT_MODEL_API_KEY=.*|TEXT_MODEL_API_KEY=$TEXT_API_KEY|" "$ENV_FILE"
sed -i "s|HF_TOKEN=.*|HF_TOKEN=$HF_TOKEN|" "$ENV_FILE"
ok ".env generated"
fi
echo
# ==========================================================================
# 4. Create data directories
# ==========================================================================
info "Creating data directories..."
mkdir -p "$PROJECT_DIR/data/uploads" "$PROJECT_DIR/data/instance"
ok "data/uploads and data/instance created"
echo
# ==========================================================================
# 5. Profile-specific setup
# ==========================================================================
case "$PROFILE" in
cloud)
info "Cloud profile — setting up ASR Proxy..."
if [ -f "$SCRIPT_DIR/asr-proxy/setup.sh" ]; then
echo " Run the ASR proxy setup separately:"
echo " bash $SCRIPT_DIR/asr-proxy/setup.sh"
fi
echo
info "Setting up iptables rules..."
if [ -f "$SCRIPT_DIR/security/iptables-rules.sh" ] && [ "$(id -u)" -eq 0 ]; then
bash "$SCRIPT_DIR/security/iptables-rules.sh"
else
echo " Run as root: sudo bash $SCRIPT_DIR/security/iptables-rules.sh"
fi
echo
info "Setting up Tailscale Serve..."
if command -v tailscale &>/dev/null; then
echo " Run: bash $SCRIPT_DIR/config/tailscale/setup-serve.sh"
else
warn "Tailscale not installed."
echo " Install: curl -fsSL https://tailscale.com/install.sh | sh"
fi
;;
local-gpu)
info "Local GPU profile — verifying NVIDIA runtime..."
if docker info 2>/dev/null | grep -qi nvidia; then
ok "NVIDIA Docker runtime available"
# Quick GPU test
if docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi &>/dev/null; then
ok "GPU test passed"
else
warn "GPU test failed. Check nvidia-container-toolkit installation."
fi
else
err "NVIDIA Docker runtime not found."
echo " Install nvidia-container-toolkit and restart Docker."
echo " https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html"
fi
;;
local-cpu)
warn "CPU-only transcription is significantly slower than GPU."
echo " Expect ~10x real-time (1h audio = ~10h processing)."
echo " Consider local-gpu or cloud profile for better performance."
;;
esac
echo
# ==========================================================================
# 6. Build and start
# ==========================================================================
info "Building DictIA Docker image..."
cd "$PROJECT_DIR"
docker build -t innova-ai/dictia:latest .
ok "Image built"
echo
info "Starting DictIA ($PROFILE profile)..."
docker compose -f "$COMPOSE_FILE" up -d
ok "Containers started"
# ==========================================================================
# 7. Health check
# ==========================================================================
echo
info "Waiting for DictIA to become healthy..."
RETRIES=30
for i in $(seq 1 $RETRIES); do
if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
ok "DictIA is healthy!"
break
fi
if [ "$i" -eq "$RETRIES" ]; then
warn "Health check timeout. Check logs: docker compose -f $COMPOSE_FILE logs"
fi
sleep 5
done
echo
echo -e "${GREEN}========================================${NC}"
echo -e "${GREEN} DictIA is ready!${NC}"
echo -e "${GREEN}========================================${NC}"
echo
echo " App: http://localhost:8899"
echo " Profile: $PROFILE"
echo " Compose: $COMPOSE_FILE"
echo
echo " Tools:"
echo " Update: bash deployment/tools/update.sh"
echo " Backup: bash deployment/tools/backup.sh"
echo " Health check: bash deployment/tools/health-check.sh"
echo

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env bash
# DictIA — Backup script
#
# Creates a timestamped backup of data, env, and Docker volumes.
# Keeps the last N backups (default: 5).
#
# Usage: bash backup.sh [BACKUP_DIR]
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
BACKUP_BASE="${1:-$PROJECT_DIR/backups}"
KEEP_COUNT=5
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
BACKUP_DIR="$BACKUP_BASE/dictia-$TIMESTAMP"
echo "=== DictIA Backup ==="
echo "Project: $PROJECT_DIR"
echo "Backup: $BACKUP_DIR"
echo
mkdir -p "$BACKUP_DIR"
# 1. Data directory
if [ -d "$PROJECT_DIR/data" ]; then
echo "[1/4] Backing up data/..."
cp -a "$PROJECT_DIR/data" "$BACKUP_DIR/data"
else
echo "[1/4] No data/ directory found, skipping."
fi
# 2. Environment file
if [ -f "$PROJECT_DIR/.env" ]; then
echo "[2/4] Backing up .env..."
cp "$PROJECT_DIR/.env" "$BACKUP_DIR/dot-env"
else
echo "[2/4] No .env found, skipping."
fi
# 3. ASR Proxy stats
ASR_STATS="$PROJECT_DIR/deployment/asr-proxy/usage-stats.json"
if [ -f "$ASR_STATS" ]; then
echo "[3/4] Backing up ASR proxy stats..."
cp "$ASR_STATS" "$BACKUP_DIR/asr-usage-stats.json"
else
echo "[3/4] No ASR proxy stats, skipping."
fi
# 4. Docker volumes (if using managed volumes)
echo "[4/4] Checking Docker volumes..."
if docker volume ls --format '{{.Name}}' 2>/dev/null | grep -q "whisperx-cache"; then
echo " Exporting whisperx-cache volume..."
docker run --rm -v whisperx-cache:/source -v "$BACKUP_DIR":/backup \
alpine tar czf /backup/whisperx-cache.tar.gz -C /source . 2>/dev/null || true
fi
# Write manifest
cat > "$BACKUP_DIR/manifest.json" <<MANIFEST
{
"timestamp": "$TIMESTAMP",
"project_dir": "$PROJECT_DIR",
"hostname": "$(hostname)",
"contents": {
"data": $([ -d "$BACKUP_DIR/data" ] && echo "true" || echo "false"),
"env": $([ -f "$BACKUP_DIR/dot-env" ] && echo "true" || echo "false"),
"asr_stats": $([ -f "$BACKUP_DIR/asr-usage-stats.json" ] && echo "true" || echo "false"),
"whisperx_cache": $([ -f "$BACKUP_DIR/whisperx-cache.tar.gz" ] && echo "true" || echo "false")
}
}
MANIFEST
# Compress
echo
echo "Compressing backup..."
ARCHIVE="$BACKUP_BASE/dictia-$TIMESTAMP.tar.gz"
tar czf "$ARCHIVE" -C "$BACKUP_BASE" "dictia-$TIMESTAMP"
rm -rf "$BACKUP_DIR"
echo "Archive: $ARCHIVE ($(du -h "$ARCHIVE" | cut -f1))"
# Rotate old backups
BACKUP_COUNT=$(ls -1 "$BACKUP_BASE"/dictia-*.tar.gz 2>/dev/null | wc -l)
if [ "$BACKUP_COUNT" -gt "$KEEP_COUNT" ]; then
echo
echo "Rotating backups (keeping last $KEEP_COUNT)..."
ls -1t "$BACKUP_BASE"/dictia-*.tar.gz | tail -n +"$((KEEP_COUNT + 1))" | xargs rm -f
fi
echo
echo "=== Backup complete ==="

View File

@@ -0,0 +1,157 @@
#!/usr/bin/env bash
# DictIA — Health check diagnostic
#
# Checks Docker, containers, endpoints, disk, RAM, and GPU.
#
# Usage:
# bash health-check.sh # Human-readable output
# bash health-check.sh --json # JSON output
# bash health-check.sh --quiet # Exit code only (0=ok, 1=issue)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
OUTPUT="human"
ISSUES=0
for arg in "$@"; do
case "$arg" in
--json) OUTPUT="json" ;;
--quiet) OUTPUT="quiet" ;;
esac
done
declare -A CHECKS
check() {
local name="$1"
local status="$2"
local detail="${3:-}"
CHECKS["$name"]="$status|$detail"
if [ "$status" = "error" ] || [ "$status" = "warning" ]; then
ISSUES=$((ISSUES + 1))
fi
}
# --- Docker ---
if command -v docker &>/dev/null && docker info &>/dev/null; then
check "docker" "ok" "Docker daemon running"
else
check "docker" "error" "Docker not available"
fi
# --- Containers ---
DICTIA_STATUS=$(docker inspect --format='{{.State.Health.Status}}' dictia 2>/dev/null || echo "not_found")
if [ "$DICTIA_STATUS" = "healthy" ]; then
check "container_dictia" "ok" "healthy"
elif [ "$DICTIA_STATUS" = "not_found" ]; then
check "container_dictia" "error" "container not found"
else
check "container_dictia" "warning" "$DICTIA_STATUS"
fi
WHISPERX_STATUS=$(docker inspect --format='{{.State.Status}}' whisperx-asr 2>/dev/null || echo "not_found")
if [ "$WHISPERX_STATUS" = "running" ]; then
check "container_whisperx" "ok" "running"
elif [ "$WHISPERX_STATUS" = "not_found" ]; then
check "container_whisperx" "info" "not present (cloud profile?)"
else
check "container_whisperx" "warning" "$WHISPERX_STATUS"
fi
# --- Endpoints ---
if curl -sf -o /dev/null -m 5 http://localhost:8899/health 2>/dev/null; then
check "endpoint_dictia" "ok" "http://localhost:8899 responding"
else
check "endpoint_dictia" "error" "http://localhost:8899 not responding"
fi
if curl -sf -o /dev/null -m 5 http://localhost:9000/health 2>/dev/null; then
check "endpoint_whisperx" "ok" "http://localhost:9000 responding"
else
check "endpoint_whisperx" "info" "http://localhost:9000 not responding"
fi
if curl -sf -o /dev/null -m 5 http://localhost:9090/health 2>/dev/null; then
check "endpoint_asr_proxy" "ok" "http://localhost:9090 responding"
else
check "endpoint_asr_proxy" "info" "http://localhost:9090 not responding"
fi
# --- Disk ---
DISK_USED=$(df -h "$PROJECT_DIR" 2>/dev/null | awk 'NR==2{print $5}' | tr -d '%')
if [ -n "$DISK_USED" ]; then
if [ "$DISK_USED" -gt 90 ]; then
check "disk" "error" "${DISK_USED}% used"
elif [ "$DISK_USED" -gt 80 ]; then
check "disk" "warning" "${DISK_USED}% used"
else
check "disk" "ok" "${DISK_USED}% used"
fi
fi
# --- RAM ---
if command -v free &>/dev/null; then
MEM_TOTAL=$(free -m | awk '/Mem:/{print $2}')
MEM_AVAIL=$(free -m | awk '/Mem:/{print $7}')
MEM_USED_PCT=$(( (MEM_TOTAL - MEM_AVAIL) * 100 / MEM_TOTAL ))
if [ "$MEM_USED_PCT" -gt 90 ]; then
check "memory" "warning" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
else
check "memory" "ok" "${MEM_USED_PCT}% used (${MEM_AVAIL}MB available)"
fi
fi
# --- GPU ---
if command -v nvidia-smi &>/dev/null; then
GPU_INFO=$(nvidia-smi --query-gpu=name,memory.used,memory.total --format=csv,noheader 2>/dev/null || echo "error")
if [ "$GPU_INFO" != "error" ]; then
check "gpu" "ok" "$GPU_INFO"
else
check "gpu" "warning" "nvidia-smi present but query failed"
fi
fi
# --- Output ---
if [ "$OUTPUT" = "json" ]; then
echo "{"
echo " \"timestamp\": \"$(date -Is)\","
echo " \"issues\": $ISSUES,"
echo " \"checks\": {"
FIRST=true
for name in "${!CHECKS[@]}"; do
IFS='|' read -r status detail <<< "${CHECKS[$name]}"
if [ "$FIRST" = true ]; then
FIRST=false
else
echo ","
fi
printf ' "%s": {"status": "%s", "detail": "%s"}' "$name" "$status" "$detail"
done
echo
echo " }"
echo "}"
elif [ "$OUTPUT" = "quiet" ]; then
exit $( [ "$ISSUES" -eq 0 ] && echo 0 || echo 1 )
else
echo "=== DictIA Health Check ==="
echo
for name in docker container_dictia container_whisperx endpoint_dictia endpoint_whisperx endpoint_asr_proxy disk memory gpu; do
if [ -n "${CHECKS[$name]+x}" ]; then
IFS='|' read -r status detail <<< "${CHECKS[$name]}"
case "$status" in
ok) ICON="[OK]" ;;
warning) ICON="[!!]" ;;
error) ICON="[ERR]" ;;
info) ICON="[--]" ;;
esac
printf " %-22s %s %s\n" "$name" "$ICON" "$detail"
fi
done
echo
if [ "$ISSUES" -eq 0 ]; then
echo "All checks passed."
else
echo "$ISSUES issue(s) found."
fi
fi

101
deployment/tools/restore.sh Normal file
View File

@@ -0,0 +1,101 @@
#!/usr/bin/env bash
# DictIA — Restore script
#
# Restores a DictIA backup archive created by backup.sh.
#
# Usage: bash restore.sh <ARCHIVE_PATH> [PROJECT_DIR]
set -euo pipefail
ARCHIVE="${1:-}"
PROJECT_DIR="${2:-$(cd "$(dirname "$0")/../.." && pwd)}"
if [ -z "$ARCHIVE" ] || [ ! -f "$ARCHIVE" ]; then
echo "Usage: bash restore.sh <backup-archive.tar.gz> [project-dir]"
echo
echo "Available backups:"
ls -1t "$PROJECT_DIR/backups"/dictia-*.tar.gz 2>/dev/null | head -5 || echo " (none found)"
exit 1
fi
echo "=== DictIA Restore ==="
echo "Archive: $ARCHIVE"
echo "Target: $PROJECT_DIR"
echo
# Validate archive
echo "Validating archive..."
TMPDIR=$(mktemp -d)
tar xzf "$ARCHIVE" -C "$TMPDIR"
BACKUP_DIR=$(ls -1d "$TMPDIR"/dictia-* | head -1)
if [ ! -f "$BACKUP_DIR/manifest.json" ]; then
echo "ERROR: Invalid backup archive (no manifest.json)"
rm -rf "$TMPDIR"
exit 1
fi
echo "Manifest:"
cat "$BACKUP_DIR/manifest.json"
echo
echo
# Confirmation
read -rp "This will overwrite current data. Continue? [y/N] " CONFIRM
if [ "$CONFIRM" != "y" ] && [ "$CONFIRM" != "Y" ]; then
echo "Aborted."
rm -rf "$TMPDIR"
exit 0
fi
# Stop services
echo
echo "Stopping DictIA services..."
COMPOSE_FILE=""
for f in cloud local-cpu local-gpu; do
if [ -f "$PROJECT_DIR/deployment/docker/docker-compose.$f.yml" ]; then
COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml"
fi
done
if [ -n "$COMPOSE_FILE" ]; then
docker compose -f "$COMPOSE_FILE" down 2>/dev/null || true
fi
# Restore data
if [ -d "$BACKUP_DIR/data" ]; then
echo "Restoring data/..."
rm -rf "$PROJECT_DIR/data"
cp -a "$BACKUP_DIR/data" "$PROJECT_DIR/data"
fi
# Restore .env
if [ -f "$BACKUP_DIR/dot-env" ]; then
echo "Restoring .env..."
cp "$BACKUP_DIR/dot-env" "$PROJECT_DIR/.env"
fi
# Restore ASR stats
if [ -f "$BACKUP_DIR/asr-usage-stats.json" ]; then
echo "Restoring ASR proxy stats..."
cp "$BACKUP_DIR/asr-usage-stats.json" "$PROJECT_DIR/deployment/asr-proxy/usage-stats.json"
fi
# Restore Docker volumes
if [ -f "$BACKUP_DIR/whisperx-cache.tar.gz" ]; then
echo "Restoring whisperx-cache volume..."
docker volume create whisperx-cache 2>/dev/null || true
docker run --rm -v whisperx-cache:/target -v "$BACKUP_DIR":/backup \
alpine sh -c "cd /target && tar xzf /backup/whisperx-cache.tar.gz" 2>/dev/null || true
fi
# Cleanup
rm -rf "$TMPDIR"
# Restart services
echo
echo "Restarting DictIA..."
if [ -n "$COMPOSE_FILE" ]; then
docker compose -f "$COMPOSE_FILE" up -d
fi
echo
echo "=== Restore complete ==="

105
deployment/tools/update.sh Normal file
View File

@@ -0,0 +1,105 @@
#!/usr/bin/env bash
# DictIA — Update script
#
# Pulls latest code, rebuilds Docker image, and restarts services.
# Detects the active deployment profile automatically.
#
# Usage: bash update.sh [--no-pull] [--no-build]
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)"
NO_PULL=false
NO_BUILD=false
for arg in "$@"; do
case "$arg" in
--no-pull) NO_PULL=true ;;
--no-build) NO_BUILD=true ;;
*) echo "Unknown option: $arg"; exit 1 ;;
esac
done
echo "=== DictIA Update ==="
echo "Project: $PROJECT_DIR"
echo
# 1. Detect active compose file
COMPOSE_FILE=""
PROFILE=""
for f in cloud local-cpu local-gpu; do
CF="$PROJECT_DIR/deployment/docker/docker-compose.$f.yml"
if [ -f "$CF" ] && docker compose -f "$CF" ps --quiet 2>/dev/null | grep -q .; then
COMPOSE_FILE="$CF"
PROFILE="$f"
break
fi
done
if [ -z "$COMPOSE_FILE" ]; then
# Fallback: check .env for profile
if [ -f "$PROJECT_DIR/.env" ]; then
PROFILE=$(grep -E '^DICTIA_PROFILE=' "$PROJECT_DIR/.env" 2>/dev/null | cut -d= -f2 || echo "cloud")
fi
PROFILE="${PROFILE:-cloud}"
COMPOSE_FILE="$PROJECT_DIR/deployment/docker/docker-compose.$PROFILE.yml"
fi
echo "Profile: $PROFILE"
echo "Compose: $COMPOSE_FILE"
echo
# 2. Git pull
if [ "$NO_PULL" = false ]; then
echo "[1/5] Pulling latest code..."
cd "$PROJECT_DIR"
git pull origin dictia-branding
else
echo "[1/5] Skipping git pull (--no-pull)"
fi
# 3. Rebuild DictIA image
if [ "$NO_BUILD" = false ]; then
echo "[2/5] Building DictIA image..."
cd "$PROJECT_DIR"
docker build -t innova-ai/dictia:latest .
else
echo "[2/5] Skipping build (--no-build)"
fi
# 3b. Pull upstream images (WhisperX) if local profile
if [ "$PROFILE" != "cloud" ] && [ "$NO_BUILD" = false ]; then
echo "[3/5] Pulling upstream images (WhisperX)..."
docker compose -f "$COMPOSE_FILE" pull whisperx-asr 2>/dev/null || true
else
echo "[3/5] Skipping upstream pull (cloud profile or --no-build)"
fi
# 4. Restart containers
echo "[4/5] Restarting containers..."
docker compose -f "$COMPOSE_FILE" down
docker compose -f "$COMPOSE_FILE" up -d
# 5. Wait for health
echo "[5/5] Waiting for health check..."
RETRIES=30
for i in $(seq 1 $RETRIES); do
if docker compose -f "$COMPOSE_FILE" ps | grep -q "healthy"; then
echo " DictIA is healthy!"
break
fi
if [ "$i" -eq "$RETRIES" ]; then
echo " WARNING: Health check timeout. Check: docker compose -f $COMPOSE_FILE logs"
fi
sleep 5
done
# Cleanup dangling images
echo
echo "Cleaning up old images..."
docker image prune -f 2>/dev/null || true
echo
echo "=== Update complete ==="
echo "DictIA: http://localhost:8899"
docker compose -f "$COMPOSE_FILE" ps