# ----------------------------------------------------------------------------- # DictIA Configuration: ASR Endpoint (Legacy) # # ⚠️ DEPRECATION NOTICE: This configuration style is still supported but # we recommend using the new unified configuration in env.transcription.example # which supports all transcription providers with auto-detection. # # Migration: Simply set ASR_BASE_URL and the connector will auto-detect ASR mode. # USE_ASR_ENDPOINT=true is no longer required (but still works for backwards compat). # # Instructions: # 1. Copy this file to a new file named .env # cp env.asr.example .env # 2. Fill in the required URLs, API keys, and settings below. # ----------------------------------------------------------------------------- # --- Text Generation Model (for summaries, titles, etc.) --- TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1 TEXT_MODEL_API_KEY=your_openrouter_api_key TEXT_MODEL_NAME=openai/gpt-4o-mini # --- GPT-5 Specific Settings (only used with OpenAI API and GPT-5 models) --- # If using GPT-5 models (gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat-latest) with OpenAI API, # these parameters will be used instead of temperature. # # Example GPT-5 configuration: # TEXT_MODEL_BASE_URL=https://api.openai.com/v1 # TEXT_MODEL_NAME=gpt-5-mini # # Reasoning effort: minimal, low, medium, high (default: medium) # - minimal: Fastest responses, minimal reasoning tokens # - low: Fast responses with basic reasoning # - medium: Balanced reasoning and speed (recommended) # - high: Maximum reasoning for complex tasks GPT5_REASONING_EFFORT=medium # # Verbosity: low, medium, high (default: medium) # - low: Concise responses # - medium: Balanced detail # - high: Detailed explanations GPT5_VERBOSITY=medium # --- Chat Model Configuration (Optional) --- # Configure a separate model for real-time chat interactions. # If not set, chat will use the TEXT_MODEL_* settings above. # # Use cases: # - Use a faster model for chat while using a more capable model for summarization # - Use a cheaper model for interactive chat to reduce costs # - Use different service tiers for different operations # # CHAT_MODEL_API_KEY=your_chat_api_key # CHAT_MODEL_BASE_URL=https://openrouter.ai/api/v1 # CHAT_MODEL_NAME=openai/gpt-4o # --- Chat GPT-5 Settings (only used with OpenAI API and GPT-5 chat models) --- # These settings allow independent control of GPT-5 parameters for chat. # If not set, falls back to the main GPT5_* settings above. # # CHAT_GPT5_REASONING_EFFORT=medium # CHAT_GPT5_VERBOSITY=medium # --- LLM Streaming Compatibility --- # Some LLM servers (e.g., certain vLLM configurations) don't support OpenAI's # stream_options parameter. If chat streaming hangs or fails, try disabling this. # Note: When disabled, token usage tracking for chat will not be available. # ENABLE_STREAM_OPTIONS=false # --- Transcription Service (ASR Endpoint) --- # New connector architecture auto-detects ASR mode when ASR_BASE_URL is set. # USE_ASR_ENDPOINT=true is deprecated but still works for backwards compatibility. # # Note: ASR endpoints handle chunking internally - CHUNK_LIMIT settings are ignored. # ASR Endpoint URL (setting this auto-enables ASR mode) # For containers in same docker-compose: Use container name and internal port # Example: http://whisper-asr:9000 (NOT the host port 6002 or external IP) # For external ASR: Use http://192.168.1.100:9000 or http://asr.example.com:9000 ASR_BASE_URL=http://whisper-asr:9000 # Deprecated: No longer needed, kept for backwards compatibility # USE_ASR_ENDPOINT=true # Speaker diarization options ASR_DIARIZE=true # ASR_MIN_SPEAKERS=1 # Hint for minimum speakers # ASR_MAX_SPEAKERS=5 # Hint for maximum speakers # ASR_RETURN_SPEAKER_EMBEDDINGS=false # Only enable for WhisperX ASR service # --- ASR Chunking (for GPUs with limited memory) --- # Self-hosted ASR services may crash on long files due to GPU memory exhaustion. # Enable app-level chunking to split long files before sending to ASR. # Default: false (ASR service handles files internally) # ASR_ENABLE_CHUNKING=true # Maximum audio duration per chunk in seconds (default: 7200 = 2 hours) # Lower this value if your GPU runs out of memory on long files. # Common values: 600 (10 min), 1200 (20 min), 1800 (30 min), 3600 (1 hour) # ASR_MAX_DURATION_SECONDS=7200 # --- Application Settings --- # Set to "true" to allow user registration, "false" to disable ALLOW_REGISTRATION=false # Comma-separated list of allowed email domains for registration. # Leave empty to allow all domains. Example: company.com,subsidiary.org REGISTRATION_ALLOWED_DOMAINS= SUMMARY_MAX_TOKENS=8000 CHAT_MAX_TOKENS=5000 # Timezone for displaying dates and times in the UI # Use a valid TZ database name (e.g., "America/New_York", "Europe/London", "UTC") TIMEZONE="UTC" # Set the logging level for the application. # Options: DEBUG, INFO, WARNING, ERROR LOG_LEVEL="INFO" # --- Audio Compression --- # Automatically compress lossless uploads (WAV, AIFF) to save storage AUDIO_COMPRESS_UPLOADS=true # Target codec: mp3 (lossy, smallest), flac (lossless), opus (lossy, efficient) AUDIO_CODEC=mp3 # Bitrate for lossy codecs (ignored for FLAC) AUDIO_BITRATE=128k # Unsupported codecs - comma-separated list of codecs to exclude from supported list # Use this if your transcription service doesn't support certain codecs # Supported codecs by default: pcm_s16le, pcm_s24le, pcm_f32le, mp3, flac, opus, vorbis, aac # Example: AUDIO_UNSUPPORTED_CODECS=opus,vorbis # AUDIO_UNSUPPORTED_CODECS= # --- Admin User (created on first run) --- ADMIN_USERNAME=admin ADMIN_EMAIL=admin@example.com ADMIN_PASSWORD=changeme # --- Inquire Mode (AI search across all recordings) --- # Set to "true" to enable semantic search and chat across all recordings # Requires additional dependencies (already included in Docker image) ENABLE_INQUIRE_MODE=false # --- Automated File Processing (Black Hole Directory) --- # Set to "true" to enable automated file processing ENABLE_AUTO_PROCESSING=false # --- Automated Export Settings --- # Automatically export transcriptions and summaries to markdown files ENABLE_AUTO_EXPORT=false # Directory where exports will be saved (per-user subdirectories created automatically) AUTO_EXPORT_DIR=/data/exports # What to include in exports AUTO_EXPORT_TRANSCRIPTION=true AUTO_EXPORT_SUMMARY=true # Processing mode: admin_only, user_directories, or single_user AUTO_PROCESS_MODE=admin_only # Directory to watch for new audio files AUTO_PROCESS_WATCH_DIR=/data/auto-process # How often to check for new files (seconds) AUTO_PROCESS_CHECK_INTERVAL=30 # How long to wait (seconds) to confirm a file has stopped changing before processing. # Increase for slow network transfers (NFS, SMB). Default: 5 # AUTO_PROCESS_STABILITY_TIME=5 # Default username for single_user mode (only used if AUTO_PROCESS_MODE=single_user) # AUTO_PROCESS_DEFAULT_USERNAME=admin # --- Auto-Deletion & Retention Settings --- # Enable automated deletion of old recordings ENABLE_AUTO_DELETION=false # Number of days to retain recordings (0 = disabled) # Example: 90 means recordings older than 90 days will be processed GLOBAL_RETENTION_DAYS=90 # Deletion mode: 'audio_only' keeps transcription, 'full_recording' deletes everything # audio_only: Deletes audio file but keeps transcription/summary/notes (recommended) # full_recording: Permanently deletes the entire recording from database DELETION_MODE=audio_only # --- Permission-Based Deletion Controls --- # Allow all users to delete their recordings, or restrict to admins only # true: All users can delete their own recordings (default) # false: Only admins can delete recordings USERS_CAN_DELETE=true # Delete speaker profiles when all their recordings are removed. # Default: false (speaker profiles and voice embeddings are preserved) # Set to true for privacy-sensitive deployments where biometric voice data # should not outlive the recordings it was derived from. # DELETE_ORPHANED_SPEAKERS=false # --- Internal Sharing Settings --- # Enable user-to-user sharing of recordings (works independently of groups) ENABLE_INTERNAL_SHARING=false # Show usernames in the UI (when sharing/viewing shared recordings) # true: Display usernames throughout the interface # false: Hide usernames (users must know each other's usernames to share) SHOW_USERNAMES_IN_UI=false # --- Public Sharing Settings --- # Enable creation of public share links (anonymous access) # true: Users can create public links to share recordings externally (default) # false: Public sharing is disabled globally ENABLE_PUBLIC_SHARING=true # Note: Admins can control public sharing permissions per-user in the admin dashboard # even when ENABLE_PUBLIC_SHARING is true # --- Incognito Mode (HIPAA-friendly) --- # Enable incognito mode for privacy-sensitive transcriptions # When enabled, users can upload recordings that are: # - Processed on the server but NOT saved to the database # - Stored only in the browser's sessionStorage (lost when tab closes) # - Audio files are immediately deleted after processing # Useful for HIPAA compliance or sensitive recordings # Default: false (feature hidden) ENABLE_INCOGNITO_MODE=false # Make incognito mode the default for in-app recordings (toggle starts ON) INCOGNITO_MODE_DEFAULT=false # --- Video Retention --- # When enabled, uploaded video files keep their video stream for in-browser playback # The audio is extracted to a temp file for transcription, then cleaned up # Default: false (video uploads extract audio only, video stream is discarded) VIDEO_RETENTION=false # --- Concurrent Uploads --- # Maximum number of simultaneous file uploads (default: 3) MAX_CONCURRENT_UPLOADS=3 # --- Background Processing Queues --- # Separate queues for transcription (slow) and summary (fast) jobs # This prevents slow ASR jobs from blocking quick summary generation # Transcription queue workers (for ASR processing, default: 2) JOB_QUEUE_WORKERS=2 # Summary queue workers (for LLM summarization, default: 2) SUMMARY_QUEUE_WORKERS=2 # Maximum retry attempts for failed jobs (default: 3) JOB_MAX_RETRIES=3 # --- Docker Settings (rarely need to be changed) --- # Database URI - SQLite (default) or PostgreSQL SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db # For PostgreSQL, use: postgresql://username:password@hostname:5432/database_name # Example: postgresql://speakr:password@postgres:5432/speakr UPLOAD_FOLDER=/data/uploads