# ----------------------------------------------------------------------------- # DictIA Configuration: Standard Whisper API (Legacy) # # ⚠️ DEPRECATION NOTICE: This configuration style is still supported but # we recommend using the new unified configuration in env.transcription.example # which supports all transcription providers with auto-detection. # # Migration: See TRANSCRIPTION_CONNECTOR documentation in env.transcription.example # For OpenAI Whisper, simply set: # TRANSCRIPTION_API_KEY=your_key # TRANSCRIPTION_MODEL=whisper-1 (or gpt-4o-transcribe-diarize for diarization) # # Instructions: # 1. Copy this file to a new file named .env # cp env.whisper.example .env # 2. Fill in the required API keys and settings below. # ----------------------------------------------------------------------------- # --- Text Generation Model (for summaries, titles, etc.) --- TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1 TEXT_MODEL_API_KEY=your_openrouter_api_key TEXT_MODEL_NAME=openai/gpt-4o-mini # --- GPT-5 Specific Settings (only used with OpenAI API and GPT-5 models) --- # If using GPT-5 models (gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat-latest) with OpenAI API, # these parameters will be used instead of temperature. # # Example GPT-5 configuration: # TEXT_MODEL_BASE_URL=https://api.openai.com/v1 # TEXT_MODEL_NAME=gpt-5-mini # # Reasoning effort: minimal, low, medium, high (default: medium) # - minimal: Fastest responses, minimal reasoning tokens # - low: Fast responses with basic reasoning # - medium: Balanced reasoning and speed (recommended) # - high: Maximum reasoning for complex tasks GPT5_REASONING_EFFORT=medium # # Verbosity: low, medium, high (default: medium) # - low: Concise responses # - medium: Balanced detail # - high: Detailed explanations GPT5_VERBOSITY=medium # --- Chat Model Configuration (Optional) --- # Configure a separate model for real-time chat interactions. # If not set, chat will use the TEXT_MODEL_* settings above. # # Use cases: # - Use a faster model for chat while using a more capable model for summarization # - Use a cheaper model for interactive chat to reduce costs # - Use different service tiers for different operations # # CHAT_MODEL_API_KEY=your_chat_api_key # CHAT_MODEL_BASE_URL=https://openrouter.ai/api/v1 # CHAT_MODEL_NAME=openai/gpt-4o # --- Chat GPT-5 Settings (only used with OpenAI API and GPT-5 chat models) --- # These settings allow independent control of GPT-5 parameters for chat. # If not set, falls back to the main GPT5_* settings above. # # CHAT_GPT5_REASONING_EFFORT=medium # CHAT_GPT5_VERBOSITY=medium # --- LLM Streaming Compatibility --- # Some LLM servers (e.g., certain vLLM configurations) don't support OpenAI's # stream_options parameter. If chat streaming hangs or fails, try disabling this. # Note: When disabled, token usage tracking for chat will not be available. # ENABLE_STREAM_OPTIONS=false # --- Transcription Service (OpenAI Whisper API) --- # New connector architecture is enabled by default. # Available models: # whisper-1 - Legacy, no diarization # gpt-4o-transcribe - High quality, no diarization # gpt-4o-mini-transcribe - Cost-effective, no diarization # gpt-4o-transcribe-diarize - Speaker diarization! (recommended) TRANSCRIPTION_BASE_URL=https://api.openai.com/v1 TRANSCRIPTION_API_KEY=your_openai_api_key TRANSCRIPTION_MODEL=whisper-1 # Legacy model name (deprecated, use TRANSCRIPTION_MODEL instead) # WHISPER_MODEL=whisper-1 # --- Application Settings --- # Set to "true" to allow user registration, "false" to disable ALLOW_REGISTRATION=false # Comma-separated list of allowed email domains for registration. # Leave empty to allow all domains. Example: company.com,subsidiary.org REGISTRATION_ALLOWED_DOMAINS= SUMMARY_MAX_TOKENS=8000 CHAT_MAX_TOKENS=5000 # Timezone for displaying dates and times in the UI # Use a valid TZ database name (e.g., "America/New_York", "Europe/London", "UTC") TIMEZONE="UTC" # Set the logging level for the application. # Options: DEBUG, INFO, WARNING, ERROR LOG_LEVEL="INFO" # --- Large File Chunking --- # Chunking is now connector-aware: # - openai_transcribe/asr_endpoint: Handled internally, these settings ignored # - openai_whisper: Uses these settings for files >25MB # # ENABLE_CHUNKING=false # Uncomment to disable (only for openai_whisper) # Chunk limit - supports size (20MB) or duration (600s, 10m) CHUNK_LIMIT=20MB # Overlap between chunks (seconds) CHUNK_OVERLAP_SECONDS=3 # --- Audio Compression --- # Automatically compress lossless uploads (WAV, AIFF) to save storage AUDIO_COMPRESS_UPLOADS=true # Target codec: mp3 (lossy, smallest), flac (lossless), opus (lossy, efficient) AUDIO_CODEC=mp3 # Bitrate for lossy codecs (ignored for FLAC) AUDIO_BITRATE=128k # Unsupported codecs - comma-separated list of codecs to exclude from supported list # Use this if your transcription service doesn't support certain codecs # Supported codecs by default: pcm_s16le, pcm_s24le, pcm_f32le, mp3, flac, opus, vorbis, aac # Example: AUDIO_UNSUPPORTED_CODECS=opus,vorbis # AUDIO_UNSUPPORTED_CODECS= # --- Admin User (created on first run) --- ADMIN_USERNAME=admin ADMIN_EMAIL=admin@example.com ADMIN_PASSWORD=changeme # --- Inquire Mode (AI search across all recordings) --- # Set to "true" to enable semantic search and chat across all recordings # Requires additional dependencies (already included in Docker image) ENABLE_INQUIRE_MODE=false # --- Automated File Processing (Black Hole Directory) --- # Set to "true" to enable automated file processing ENABLE_AUTO_PROCESSING=false # --- Automated Export Settings --- # Automatically export transcriptions and summaries to markdown files ENABLE_AUTO_EXPORT=false # Directory where exports will be saved (per-user subdirectories created automatically) AUTO_EXPORT_DIR=/data/exports # What to include in exports AUTO_EXPORT_TRANSCRIPTION=true AUTO_EXPORT_SUMMARY=true # Processing mode: admin_only, user_directories, or single_user AUTO_PROCESS_MODE=admin_only # Directory to watch for new audio files AUTO_PROCESS_WATCH_DIR=/data/auto-process # How often to check for new files (seconds) AUTO_PROCESS_CHECK_INTERVAL=30 # How long to wait (seconds) to confirm a file has stopped changing before processing. # Increase for slow network transfers (NFS, SMB). Default: 5 # AUTO_PROCESS_STABILITY_TIME=5 # Default username for single_user mode (only used if AUTO_PROCESS_MODE=single_user) # AUTO_PROCESS_DEFAULT_USERNAME=admin # --- Auto-Deletion & Retention Settings --- # Enable automated deletion of old recordings ENABLE_AUTO_DELETION=false # Number of days to retain recordings (0 = disabled) # Example: 90 means recordings older than 90 days will be processed GLOBAL_RETENTION_DAYS=90 # Deletion mode: 'audio_only' keeps transcription, 'full_recording' deletes everything # audio_only: Deletes audio file but keeps transcription/summary/notes (recommended) # full_recording: Permanently deletes the entire recording from database DELETION_MODE=audio_only # --- Permission-Based Deletion Controls --- # Allow all users to delete their recordings, or restrict to admins only # true: All users can delete their own recordings (default) # false: Only admins can delete recordings USERS_CAN_DELETE=true # Delete speaker profiles when all their recordings are removed. # Default: false (speaker profiles and voice embeddings are preserved) # Set to true for privacy-sensitive deployments where biometric voice data # should not outlive the recordings it was derived from. # DELETE_ORPHANED_SPEAKERS=false # --- Internal Sharing Settings --- # Enable user-to-user sharing of recordings (works independently of groups) ENABLE_INTERNAL_SHARING=false # Show usernames in the UI (when sharing/viewing shared recordings) # true: Display usernames throughout the interface # false: Hide usernames (users must know each other's usernames to share) SHOW_USERNAMES_IN_UI=false # --- Public Sharing Settings --- # Enable creation of public share links (anonymous access) # true: Users can create public links to share recordings externally (default) # false: Public sharing is disabled globally ENABLE_PUBLIC_SHARING=true # Note: Admins can control public sharing permissions per-user in the admin dashboard # even when ENABLE_PUBLIC_SHARING is true # --- Incognito Mode (HIPAA-friendly) --- # Enable incognito mode for privacy-sensitive transcriptions # When enabled, users can upload recordings that are: # - Processed on the server but NOT saved to the database # - Stored only in the browser's sessionStorage (lost when tab closes) # - Audio files are immediately deleted after processing # Useful for HIPAA compliance or sensitive recordings # Default: false (feature hidden) ENABLE_INCOGNITO_MODE=false # Make incognito mode the default for in-app recordings (toggle starts ON) INCOGNITO_MODE_DEFAULT=false # --- Video Retention --- # When enabled, uploaded video files keep their video stream for in-browser playback # The audio is extracted to a temp file for transcription, then cleaned up # Default: false (video uploads extract audio only, video stream is discarded) VIDEO_RETENTION=false # --- Concurrent Uploads --- # Maximum number of simultaneous file uploads (default: 3) MAX_CONCURRENT_UPLOADS=3 # --- Background Processing Queues --- # Separate queues for transcription (slow) and summary (fast) jobs # This prevents slow ASR jobs from blocking quick summary generation # Transcription queue workers (for ASR processing, default: 2) JOB_QUEUE_WORKERS=2 # Summary queue workers (for LLM summarization, default: 2) SUMMARY_QUEUE_WORKERS=2 # Maximum retry attempts for failed jobs (default: 3) JOB_MAX_RETRIES=3 # --- Docker Settings (rarely need to be changed) --- # Database URI - SQLite (default) or PostgreSQL SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db # For PostgreSQL, use: postgresql://username:password@hostname:5432/database_name # Example: postgresql://speakr:password@postgres:5432/speakr UPLOAD_FOLDER=/data/uploads