dictia-public/deployment/docker/docker-compose.local-gpu.yml

# DictIA — Local GPU deployment (WhisperX on NVIDIA GPU + DictIA)
#
# Usage:
#   docker compose -f deployment/docker/docker-compose.local-gpu.yml up -d
#
# Prerequisites:
#   - NVIDIA GPU with CUDA support
#   - nvidia-container-toolkit installed
#   - Docker configured with nvidia runtime

services:
  whisperx-asr:
    image: ghcr.io/jim60105/whisperx-asr:latest-cuda
    container_name: whisperx-asr
    restart: unless-stopped
    ports:
      - "9000:9000"
    environment:
      - ASR_MODEL=${ASR_MODEL:-large-v3}
      - ASR_ENGINE=whisperx
      - DEVICE=cuda
      - COMPUTE_TYPE=float16
      - HF_TOKEN=${HF_TOKEN:-}
    volumes:
      - whisperx-cache:/root/.cache
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    networks:
      - dictia-network

  dictia:
    build:
      context: ../..
      dockerfile: Dockerfile
    image: innova-ai/dictia:latest
    container_name: dictia
    restart: unless-stopped
    ports:
      - "8899:8899"
    env_file:
      - ../../.env
    environment:
      - LOG_LEVEL=${LOG_LEVEL:-ERROR}
      - ASR_BASE_URL=http://whisperx-asr:9000
    volumes:
      - ../../data/uploads:/data/uploads
      - ../../data/instance:/data/instance
    depends_on:
      - whisperx-asr
    healthcheck:
      test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8899/health')"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 30s
    networks:
      - dictia-network

volumes:
  whisperx-cache:

networks:
  dictia-network:
    driver: bridge