Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)

This commit is contained in:
InnovA AI
2026-03-16 21:47:37 +00:00
commit 42772a31ed
365 changed files with 103572 additions and 0 deletions

View File

@@ -0,0 +1,65 @@
services:
app:
# Use 'lite' tag for a smaller image (~700MB vs ~4.4GB) without PyTorch
# Semantic search in Inquire Mode falls back to text search; all other features work normally
image: dictia:latest
container_name: dictia
restart: unless-stopped
ports:
- "8899:8899"
# --- Configuration ---
# Environment variables are loaded from the .env file.
#
# To get started:
# 1. Copy this file to your project root:
# cp config/docker-compose.example.yml docker-compose.yml
#
# 2. Copy the unified transcription config (RECOMMENDED):
# cp config/env.transcription.example .env
#
# This supports all providers with auto-detection:
# - OpenAI GPT-4o with diarization (set TRANSCRIPTION_MODEL=gpt-4o-transcribe-diarize)
# - Self-hosted ASR/WhisperX (set ASR_BASE_URL=http://your-asr:9000)
# - Legacy Whisper (set TRANSCRIPTION_MODEL=whisper-1)
#
# Legacy config files (still supported):
# - config/env.whisper.example - Standard Whisper API
# - config/env.whisperx.example - WhisperX with voice profiles
# - config/env.asr.example - Basic ASR with diarization
#
# 3. Edit the .env file to add your API keys:
# - TRANSCRIPTION_API_KEY (for OpenAI) or ASR_BASE_URL (for self-hosted)
# - TEXT_MODEL_API_KEY (REQUIRED for summaries, titles, and chat)
#
# 4. Start DictIA:
# docker compose up -d
env_file:
- .env
environment:
# Set log level for troubleshooting
# Use ERROR for production (minimal logs)
# Use INFO for debugging issues (recommended when troubleshooting)
# Use DEBUG for detailed development logging
- LOG_LEVEL=ERROR
# --- Volume Configuration ---
# Choose ONE of the following volume configurations.
# Option 1 (Recommended): Bind mounts to local folders.
volumes:
- ./uploads:/data/uploads
- ./instance:/data/instance
# Optional: Uncomment if using auto-export feature (ENABLE_AUTO_EXPORT=true)
# - ./exports:/data/exports
# Optional: Uncomment if using auto-processing feature (ENABLE_AUTO_PROCESSING=true)
# - ./auto-process:/data/auto-process
# Option 2: Docker-managed volumes.
# volumes:
# - dictia-uploads:/data/uploads
# - dictia-instance:/data/instance
# # Optional: Uncomment if using auto-export feature
# # - dictia-exports:/data/exports
# # Optional: Uncomment if using auto-processing feature
# # - dictia-auto-process:/data/auto-process

259
config/env.asr.example Normal file
View File

@@ -0,0 +1,259 @@
# -----------------------------------------------------------------------------
# DictIA Configuration: ASR Endpoint (Legacy)
#
# ⚠️ DEPRECATION NOTICE: This configuration style is still supported but
# we recommend using the new unified configuration in env.transcription.example
# which supports all transcription providers with auto-detection.
#
# Migration: Simply set ASR_BASE_URL and the connector will auto-detect ASR mode.
# USE_ASR_ENDPOINT=true is no longer required (but still works for backwards compat).
#
# Instructions:
# 1. Copy this file to a new file named .env
# cp env.asr.example .env
# 2. Fill in the required URLs, API keys, and settings below.
# -----------------------------------------------------------------------------
# --- Text Generation Model (for summaries, titles, etc.) ---
TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
TEXT_MODEL_API_KEY=your_openrouter_api_key
TEXT_MODEL_NAME=openai/gpt-4o-mini
# --- GPT-5 Specific Settings (only used with OpenAI API and GPT-5 models) ---
# If using GPT-5 models (gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat-latest) with OpenAI API,
# these parameters will be used instead of temperature.
#
# Example GPT-5 configuration:
# TEXT_MODEL_BASE_URL=https://api.openai.com/v1
# TEXT_MODEL_NAME=gpt-5-mini
#
# Reasoning effort: minimal, low, medium, high (default: medium)
# - minimal: Fastest responses, minimal reasoning tokens
# - low: Fast responses with basic reasoning
# - medium: Balanced reasoning and speed (recommended)
# - high: Maximum reasoning for complex tasks
GPT5_REASONING_EFFORT=medium
#
# Verbosity: low, medium, high (default: medium)
# - low: Concise responses
# - medium: Balanced detail
# - high: Detailed explanations
GPT5_VERBOSITY=medium
# --- Chat Model Configuration (Optional) ---
# Configure a separate model for real-time chat interactions.
# If not set, chat will use the TEXT_MODEL_* settings above.
#
# Use cases:
# - Use a faster model for chat while using a more capable model for summarization
# - Use a cheaper model for interactive chat to reduce costs
# - Use different service tiers for different operations
#
# CHAT_MODEL_API_KEY=your_chat_api_key
# CHAT_MODEL_BASE_URL=https://openrouter.ai/api/v1
# CHAT_MODEL_NAME=openai/gpt-4o
# --- Chat GPT-5 Settings (only used with OpenAI API and GPT-5 chat models) ---
# These settings allow independent control of GPT-5 parameters for chat.
# If not set, falls back to the main GPT5_* settings above.
#
# CHAT_GPT5_REASONING_EFFORT=medium
# CHAT_GPT5_VERBOSITY=medium
# --- LLM Streaming Compatibility ---
# Some LLM servers (e.g., certain vLLM configurations) don't support OpenAI's
# stream_options parameter. If chat streaming hangs or fails, try disabling this.
# Note: When disabled, token usage tracking for chat will not be available.
# ENABLE_STREAM_OPTIONS=false
# --- Transcription Service (ASR Endpoint) ---
# New connector architecture auto-detects ASR mode when ASR_BASE_URL is set.
# USE_ASR_ENDPOINT=true is deprecated but still works for backwards compatibility.
#
# Note: ASR endpoints handle chunking internally - CHUNK_LIMIT settings are ignored.
# ASR Endpoint URL (setting this auto-enables ASR mode)
# For containers in same docker-compose: Use container name and internal port
# Example: http://whisper-asr:9000 (NOT the host port 6002 or external IP)
# For external ASR: Use http://192.168.1.100:9000 or http://asr.example.com:9000
ASR_BASE_URL=http://whisper-asr:9000
# Deprecated: No longer needed, kept for backwards compatibility
# USE_ASR_ENDPOINT=true
# Speaker diarization options
ASR_DIARIZE=true
# ASR_MIN_SPEAKERS=1 # Hint for minimum speakers
# ASR_MAX_SPEAKERS=5 # Hint for maximum speakers
# ASR_RETURN_SPEAKER_EMBEDDINGS=false # Only enable for WhisperX ASR service
# --- ASR Chunking (for GPUs with limited memory) ---
# Self-hosted ASR services may crash on long files due to GPU memory exhaustion.
# Enable app-level chunking to split long files before sending to ASR.
# Default: false (ASR service handles files internally)
# ASR_ENABLE_CHUNKING=true
# Maximum audio duration per chunk in seconds (default: 7200 = 2 hours)
# Lower this value if your GPU runs out of memory on long files.
# Common values: 600 (10 min), 1200 (20 min), 1800 (30 min), 3600 (1 hour)
# ASR_MAX_DURATION_SECONDS=7200
# --- Application Settings ---
# Set to "true" to allow user registration, "false" to disable
ALLOW_REGISTRATION=false
# Comma-separated list of allowed email domains for registration.
# Leave empty to allow all domains. Example: company.com,subsidiary.org
REGISTRATION_ALLOWED_DOMAINS=
SUMMARY_MAX_TOKENS=8000
CHAT_MAX_TOKENS=5000
# Timezone for displaying dates and times in the UI
# Use a valid TZ database name (e.g., "America/New_York", "Europe/London", "UTC")
TIMEZONE="UTC"
# Set the logging level for the application.
# Options: DEBUG, INFO, WARNING, ERROR
LOG_LEVEL="INFO"
# --- Audio Compression ---
# Automatically compress lossless uploads (WAV, AIFF) to save storage
AUDIO_COMPRESS_UPLOADS=true
# Target codec: mp3 (lossy, smallest), flac (lossless), opus (lossy, efficient)
AUDIO_CODEC=mp3
# Bitrate for lossy codecs (ignored for FLAC)
AUDIO_BITRATE=128k
# Unsupported codecs - comma-separated list of codecs to exclude from supported list
# Use this if your transcription service doesn't support certain codecs
# Supported codecs by default: pcm_s16le, pcm_s24le, pcm_f32le, mp3, flac, opus, vorbis, aac
# Example: AUDIO_UNSUPPORTED_CODECS=opus,vorbis
# AUDIO_UNSUPPORTED_CODECS=
# --- Admin User (created on first run) ---
ADMIN_USERNAME=admin
ADMIN_EMAIL=admin@example.com
ADMIN_PASSWORD=changeme
# --- Inquire Mode (AI search across all recordings) ---
# Set to "true" to enable semantic search and chat across all recordings
# Requires additional dependencies (already included in Docker image)
ENABLE_INQUIRE_MODE=false
# --- Automated File Processing (Black Hole Directory) ---
# Set to "true" to enable automated file processing
ENABLE_AUTO_PROCESSING=false
# --- Automated Export Settings ---
# Automatically export transcriptions and summaries to markdown files
ENABLE_AUTO_EXPORT=false
# Directory where exports will be saved (per-user subdirectories created automatically)
AUTO_EXPORT_DIR=/data/exports
# What to include in exports
AUTO_EXPORT_TRANSCRIPTION=true
AUTO_EXPORT_SUMMARY=true
# Processing mode: admin_only, user_directories, or single_user
AUTO_PROCESS_MODE=admin_only
# Directory to watch for new audio files
AUTO_PROCESS_WATCH_DIR=/data/auto-process
# How often to check for new files (seconds)
AUTO_PROCESS_CHECK_INTERVAL=30
# How long to wait (seconds) to confirm a file has stopped changing before processing.
# Increase for slow network transfers (NFS, SMB). Default: 5
# AUTO_PROCESS_STABILITY_TIME=5
# Default username for single_user mode (only used if AUTO_PROCESS_MODE=single_user)
# AUTO_PROCESS_DEFAULT_USERNAME=admin
# --- Auto-Deletion & Retention Settings ---
# Enable automated deletion of old recordings
ENABLE_AUTO_DELETION=false
# Number of days to retain recordings (0 = disabled)
# Example: 90 means recordings older than 90 days will be processed
GLOBAL_RETENTION_DAYS=90
# Deletion mode: 'audio_only' keeps transcription, 'full_recording' deletes everything
# audio_only: Deletes audio file but keeps transcription/summary/notes (recommended)
# full_recording: Permanently deletes the entire recording from database
DELETION_MODE=audio_only
# --- Permission-Based Deletion Controls ---
# Allow all users to delete their recordings, or restrict to admins only
# true: All users can delete their own recordings (default)
# false: Only admins can delete recordings
USERS_CAN_DELETE=true
# Delete speaker profiles when all their recordings are removed.
# Default: false (speaker profiles and voice embeddings are preserved)
# Set to true for privacy-sensitive deployments where biometric voice data
# should not outlive the recordings it was derived from.
# DELETE_ORPHANED_SPEAKERS=false
# --- Internal Sharing Settings ---
# Enable user-to-user sharing of recordings (works independently of groups)
ENABLE_INTERNAL_SHARING=false
# Show usernames in the UI (when sharing/viewing shared recordings)
# true: Display usernames throughout the interface
# false: Hide usernames (users must know each other's usernames to share)
SHOW_USERNAMES_IN_UI=false
# --- Public Sharing Settings ---
# Enable creation of public share links (anonymous access)
# true: Users can create public links to share recordings externally (default)
# false: Public sharing is disabled globally
ENABLE_PUBLIC_SHARING=true
# Note: Admins can control public sharing permissions per-user in the admin dashboard
# even when ENABLE_PUBLIC_SHARING is true
# --- Incognito Mode (HIPAA-friendly) ---
# Enable incognito mode for privacy-sensitive transcriptions
# When enabled, users can upload recordings that are:
# - Processed on the server but NOT saved to the database
# - Stored only in the browser's sessionStorage (lost when tab closes)
# - Audio files are immediately deleted after processing
# Useful for HIPAA compliance or sensitive recordings
# Default: false (feature hidden)
ENABLE_INCOGNITO_MODE=false
# Make incognito mode the default for in-app recordings (toggle starts ON)
INCOGNITO_MODE_DEFAULT=false
# --- Video Retention ---
# When enabled, uploaded video files keep their video stream for in-browser playback
# The audio is extracted to a temp file for transcription, then cleaned up
# Default: false (video uploads extract audio only, video stream is discarded)
VIDEO_RETENTION=false
# --- Concurrent Uploads ---
# Maximum number of simultaneous file uploads (default: 3)
MAX_CONCURRENT_UPLOADS=3
# --- Background Processing Queues ---
# Separate queues for transcription (slow) and summary (fast) jobs
# This prevents slow ASR jobs from blocking quick summary generation
# Transcription queue workers (for ASR processing, default: 2)
JOB_QUEUE_WORKERS=2
# Summary queue workers (for LLM summarization, default: 2)
SUMMARY_QUEUE_WORKERS=2
# Maximum retry attempts for failed jobs (default: 3)
JOB_MAX_RETRIES=3
# --- Docker Settings (rarely need to be changed) ---
# Database URI - SQLite (default) or PostgreSQL
SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
# For PostgreSQL, use: postgresql://username:password@hostname:5432/database_name
# Example: postgresql://speakr:password@postgres:5432/speakr
UPLOAD_FOLDER=/data/uploads

109
config/env.email.example Normal file
View File

@@ -0,0 +1,109 @@
###############################################################################
# Email Verification & Password Reset Configuration
###############################################################################
# Enable email verification for new user registrations.
# When enabled, new users must verify their email before full access.
# Default: false
ENABLE_EMAIL_VERIFICATION=false
# Require email verification to log in.
# Only effective when ENABLE_EMAIL_VERIFICATION=true.
# When true, users cannot log in until they verify their email.
# Default: false
REQUIRE_EMAIL_VERIFICATION=false
###############################################################################
# SMTP Configuration
###############################################################################
# SMTP server hostname (required for email functionality)
# Examples: smtp.gmail.com, smtp.sendgrid.net, smtp.mailgun.org
SMTP_HOST=smtp.gmail.com
# SMTP server port
# Common ports: 587 (TLS/STARTTLS), 465 (SSL), 25 (unencrypted)
# Default: 587
SMTP_PORT=587
# SMTP authentication username (usually your email address)
SMTP_USERNAME=your-email@gmail.com
# SMTP authentication password
# For Gmail: Use an App Password (not your regular password)
# https://support.google.com/accounts/answer/185833
SMTP_PASSWORD=your-app-password
# Use TLS/STARTTLS encryption (recommended for port 587)
# Default: true
SMTP_USE_TLS=true
# Use SSL encryption (for port 465)
# Note: Only enable one of SMTP_USE_TLS or SMTP_USE_SSL
# Default: false
SMTP_USE_SSL=false
# Email address that appears in the "From" field
# Should be a valid email address, ideally matching your domain
SMTP_FROM_ADDRESS=noreply@yourdomain.com
# Display name that appears alongside the from address
# Default: Speakr
SMTP_FROM_NAME=Speakr
###############################################################################
# Provider-Specific Examples
###############################################################################
# --- Gmail ---
# SMTP_HOST=smtp.gmail.com
# SMTP_PORT=587
# SMTP_USE_TLS=true
# SMTP_USERNAME=your-email@gmail.com
# SMTP_PASSWORD=your-app-password # Generate at https://myaccount.google.com/apppasswords
# --- SendGrid ---
# SMTP_HOST=smtp.sendgrid.net
# SMTP_PORT=587
# SMTP_USE_TLS=true
# SMTP_USERNAME=apikey
# SMTP_PASSWORD=your-sendgrid-api-key
# --- Mailgun ---
# SMTP_HOST=smtp.mailgun.org
# SMTP_PORT=587
# SMTP_USE_TLS=true
# SMTP_USERNAME=postmaster@your-domain.mailgun.org
# SMTP_PASSWORD=your-mailgun-password
# --- Amazon SES ---
# SMTP_HOST=email-smtp.us-east-1.amazonaws.com
# SMTP_PORT=587
# SMTP_USE_TLS=true
# SMTP_USERNAME=your-ses-smtp-username
# SMTP_PASSWORD=your-ses-smtp-password
# --- Microsoft 365 / Outlook ---
# SMTP_HOST=smtp.office365.com
# SMTP_PORT=587
# SMTP_USE_TLS=true
# SMTP_USERNAME=your-email@yourdomain.com
# SMTP_PASSWORD=your-password
###############################################################################
# Notes
###############################################################################
# Token Expiry Times:
# - Email verification links expire after 24 hours
# - Password reset links expire after 1 hour
# Migration Behavior:
# - Existing users are automatically marked as email_verified=true
# - New users (when feature is enabled) start as email_verified=false
# Security Recommendations:
# - Always use TLS or SSL encryption
# - Use app-specific passwords when available (Gmail, etc.)
# - Consider using a dedicated email service (SendGrid, Mailgun, SES)
# - Set a strong SECRET_KEY in your Flask configuration

32
config/env.sso.example Normal file
View File

@@ -0,0 +1,32 @@
###############################################################################
# SSO (OIDC) Authentication
###############################################################################
# Enable SSO (Single Sign-On) authentication. Requires discovery URL and client credentials.
ENABLE_SSO=false
# Display name for the provider (shown in UI button)
SSO_PROVIDER_NAME=Keycloak
# OIDC client credentials
SSO_CLIENT_ID=speakr
SSO_CLIENT_SECRET=change-me
# OIDC discovery document URL (well-known endpoint)
SSO_DISCOVERY_URL=https://keycloak.example.com/realms/master/.well-known/openid-configuration
# Public redirect URI exposed by Speakr (must be registered in the IdP)
SSO_REDIRECT_URI=https://speakr.example.com/auth/sso/callback
# Auto-registration settings
# Allow automatic account creation for new users signing in via SSO.
SSO_AUTO_REGISTER=true
# Comma-separated list of allowed email domains for auto-registration.
# Leave empty to allow all domains (e.g., example.com,company.org).
SSO_ALLOWED_DOMAINS=
# Claims used to map user profile fields
SSO_DEFAULT_USERNAME_CLAIM=preferred_username
SSO_DEFAULT_NAME_CLAIM=name

View File

@@ -0,0 +1,289 @@
# =============================================================================
# Transcription Connector Configuration
# =============================================================================
#
# DictIA supports multiple transcription providers through a connector-based
# architecture. This file documents all available configuration options.
#
# Quick Start (Simplified):
# 1. For OpenAI with diarization: Set TRANSCRIPTION_MODEL=gpt-4o-transcribe-diarize
# 2. For self-hosted ASR: Set ASR_BASE_URL=http://your-asr:9000
# 3. For legacy Whisper: Set TRANSCRIPTION_API_KEY and optionally TRANSCRIPTION_MODEL
#
# Auto-Detection Priority:
# 1. TRANSCRIPTION_CONNECTOR - explicit connector name (if you need full control)
# 2. ASR_BASE_URL - if set, uses ASR endpoint connector
# 3. TRANSCRIPTION_MODEL contains 'gpt-4o' - uses OpenAI Transcribe connector
# 4. Default - uses OpenAI Whisper connector with TRANSCRIPTION_MODEL or whisper-1
# =============================================================================
# TEXT GENERATION MODEL (REQUIRED for summaries, titles, chat)
# =============================================================================
# DictIA uses a text/LLM model for generating summaries, titles, and chat.
# This is separate from the transcription model (STT).
#
# You can use OpenRouter (recommended - access to many models) or direct OpenAI API.
# OpenRouter example (recommended - supports many models):
TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
TEXT_MODEL_API_KEY=your_openrouter_api_key
TEXT_MODEL_NAME=openai/gpt-4o-mini
# OpenAI direct example:
# TEXT_MODEL_BASE_URL=https://api.openai.com/v1
# TEXT_MODEL_API_KEY=sk-your_openai_api_key
# TEXT_MODEL_NAME=gpt-4o-mini
# --- GPT-5 Specific Settings (only used with OpenAI API and GPT-5 models) ---
# Reasoning effort: minimal, low, medium, high (default: medium)
GPT5_REASONING_EFFORT=medium
# Verbosity: low, medium, high (default: medium)
GPT5_VERBOSITY=medium
# --- Chat Model Configuration (Optional) ---
# Configure a separate model for real-time chat interactions.
# If not set, chat will use the TEXT_MODEL_* settings above.
# CHAT_MODEL_API_KEY=your_chat_api_key
# CHAT_MODEL_BASE_URL=https://openrouter.ai/api/v1
# CHAT_MODEL_NAME=openai/gpt-4o
# =============================================================================
# CONNECTOR SELECTION (Auto-detected if not set)
# =============================================================================
# Options: openai_whisper, openai_transcribe, asr_endpoint
# Leave empty to auto-detect based on other settings
# TRANSCRIPTION_CONNECTOR=
# Feature flag to enable/disable new connector architecture (default: true)
# Set to false to use legacy code path for troubleshooting
# USE_NEW_TRANSCRIPTION_ARCHITECTURE=true
# =============================================================================
# OPENAI CONFIGURATION (Required for openai_whisper and openai_transcribe)
# =============================================================================
TRANSCRIPTION_API_KEY=your_openai_api_key
TRANSCRIPTION_BASE_URL=https://api.openai.com/v1
# Model Selection - determines which connector is used:
#
# whisper-1 - Legacy Whisper model, no diarization, $0.006/min
# Supports: srt, vtt, json, verbose_json output formats
#
# gpt-4o-transcribe - High quality transcription, no diarization, $0.006/min
# Better accuracy than whisper-1, accepts prompts
#
# gpt-4o-mini-transcribe - Cost-effective option, no diarization, $0.003/min
# Good for high-volume, budget-conscious use
#
# gpt-4o-transcribe-diarize - Speaker diarization!, $0.006/min
# Identifies speakers as A, B, C, D...
# Requires chunking_strategy for audio >30s
#
TRANSCRIPTION_MODEL=gpt-4o-transcribe-diarize
# Legacy Whisper model name (used when TRANSCRIPTION_MODEL is not set)
# WHISPER_MODEL=whisper-1
# =============================================================================
# ASR ENDPOINT CONFIGURATION (For self-hosted whisper services)
# =============================================================================
# Note: USE_ASR_ENDPOINT is deprecated. Just set ASR_BASE_URL instead.
# The connector will auto-detect ASR mode when ASR_BASE_URL is set.
# USE_ASR_ENDPOINT=true # Deprecated - kept for backwards compatibility
# Base URL of your ASR service (required if USE_ASR_ENDPOINT=true)
# Supports: whisper-asr-webservice, WhisperX, and compatible services
# ASR_BASE_URL=http://whisper-asr:9000
# Request timeout in seconds (default: 1800 = 30 minutes)
# Increase for very long audio files
# ASR_TIMEOUT=1800
# Enable speaker diarization (default: true)
# ASR_DIARIZE=true
# Speaker count hints (optional, helps with diarization accuracy)
# ASR_MIN_SPEAKERS=1
# ASR_MAX_SPEAKERS=5
# Return speaker embeddings for speaker identification (WhisperX only)
# Enables automatic speaker matching across recordings
# ASR_RETURN_SPEAKER_EMBEDDINGS=false
# =============================================================================
# CHUNKING CONFIGURATION (For large files)
# =============================================================================
# Chunking is now connector-aware with this priority:
# 1. Connector handles internally (openai_transcribe, asr_endpoint) → No app chunking
# 2. ENABLE_CHUNKING=false → Disable chunking (only affects openai_whisper)
# 3. CHUNK_LIMIT set → Use your settings
# 4. Connector defaults → Use connector's recommended limits
# 5. App default → 20MB size-based
#
# For openai_transcribe/asr_endpoint: These settings are IGNORED (connector handles it)
# For openai_whisper: These settings control chunking behavior
# ENABLE_CHUNKING=false # Uncomment to disable chunking for openai_whisper
# Chunk limit - supports size (20MB) or duration (600s, 10m)
CHUNK_LIMIT=20MB
# Overlap between chunks in seconds (helps with transcription accuracy at boundaries)
CHUNK_OVERLAP_SECONDS=3
# =============================================================================
# EXAMPLE CONFIGURATIONS (Simplified)
# =============================================================================
#
# --- OpenAI with Speaker Diarization (Recommended) ---
# Just two environment variables needed:
# TRANSCRIPTION_API_KEY=sk-xxx
# TRANSCRIPTION_MODEL=gpt-4o-transcribe-diarize
#
# --- Self-hosted WhisperX (Best for privacy) ---
# Just one environment variable needed (auto-detects ASR mode):
# ASR_BASE_URL=http://whisper-asr:9000
# Optional:
# ASR_DIARIZE=true
# ASR_RETURN_SPEAKER_EMBEDDINGS=true
#
# --- OpenAI Whisper (Legacy, no diarization) ---
# TRANSCRIPTION_API_KEY=sk-xxx
# TRANSCRIPTION_MODEL=whisper-1
#
# --- Custom Whisper model (local or compatible endpoint) ---
# TRANSCRIPTION_API_KEY=not-needed
# TRANSCRIPTION_BASE_URL=http://localhost:8080/v1
# TRANSCRIPTION_MODEL=Systran/faster-distil-whisper-large-v3
# =============================================================================
# APPLICATION SETTINGS
# =============================================================================
# --- Admin User (created on first run) ---
ADMIN_USERNAME=admin
ADMIN_EMAIL=admin@example.com
ADMIN_PASSWORD=changeme
# --- Registration & Access ---
ALLOW_REGISTRATION=false
# Comma-separated list of allowed email domains for registration.
# Leave empty to allow all domains. Example: company.com,subsidiary.org
REGISTRATION_ALLOWED_DOMAINS=
# --- Token Limits ---
SUMMARY_MAX_TOKENS=8000
CHAT_MAX_TOKENS=5000
# --- Timezone ---
# Use a valid TZ database name (e.g., "America/New_York", "Europe/London", "UTC")
TIMEZONE="UTC"
# --- Logging ---
LOG_LEVEL="INFO"
# =============================================================================
# AUDIO PROCESSING
# =============================================================================
# --- Audio Compression ---
# Automatically compress lossless uploads (WAV, AIFF) to save storage
AUDIO_COMPRESS_UPLOADS=true
# Target codec: mp3 (lossy, smallest), flac (lossless), opus (lossy, efficient)
AUDIO_CODEC=mp3
# Bitrate for lossy codecs (ignored for FLAC)
AUDIO_BITRATE=128k
# Unsupported codecs - comma-separated list of codecs to exclude
# Example: AUDIO_UNSUPPORTED_CODECS=opus,vorbis
# AUDIO_UNSUPPORTED_CODECS=
# =============================================================================
# OPTIONAL FEATURES
# =============================================================================
# --- Inquire Mode (AI search across all recordings) ---
ENABLE_INQUIRE_MODE=false
# --- Automated File Processing (Black Hole Directory) ---
ENABLE_AUTO_PROCESSING=false
# AUTO_PROCESS_MODE=admin_only
# AUTO_PROCESS_WATCH_DIR=/data/auto-process
# --- Automated Export ---
ENABLE_AUTO_EXPORT=false
# AUTO_EXPORT_DIR=/data/exports
# --- Auto-Deletion & Retention ---
ENABLE_AUTO_DELETION=false
# GLOBAL_RETENTION_DAYS=90
# DELETION_MODE=audio_only
# --- Sharing Settings ---
ENABLE_INTERNAL_SHARING=false
ENABLE_PUBLIC_SHARING=true
# SHOW_USERNAMES_IN_UI=false
# --- Permission Controls ---
USERS_CAN_DELETE=true
# Delete speaker profiles when all their recordings are removed.
# Default: false (speaker profiles and voice embeddings are preserved)
# Set to true for privacy-sensitive deployments where biometric voice data
# should not outlive the recordings it was derived from.
# DELETE_ORPHANED_SPEAKERS=false
# --- Video Retention ---
# When enabled, uploaded video files keep their video stream for in-browser playback
# The audio is extracted to a temp file for transcription, then cleaned up
# Default: false (video uploads extract audio only, video stream is discarded)
VIDEO_RETENTION=false
# --- Video Passthrough to ASR ---
# Send original video files directly to ASR without extracting audio.
# Useful for custom ASR backends that handle video internally (e.g., multi-track audio extraction).
# When enabled, video files bypass audio extraction, codec conversion, and chunking.
# Only affects video files — audio uploads are processed normally.
# Default: false
# VIDEO_PASSTHROUGH_ASR=false
# --- Concurrent Uploads ---
# Maximum number of simultaneous file uploads (default: 3)
MAX_CONCURRENT_UPLOADS=3
# =============================================================================
# BACKGROUND PROCESSING
# =============================================================================
# Transcription queue workers (default: 2)
JOB_QUEUE_WORKERS=2
# Summary queue workers (default: 2)
SUMMARY_QUEUE_WORKERS=2
# Maximum retry attempts for failed jobs (default: 3)
JOB_MAX_RETRIES=3
# =============================================================================
# DOCKER/DATABASE SETTINGS
# =============================================================================
# Database URI - SQLite (default) or PostgreSQL
SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
# For PostgreSQL: postgresql://username:password@hostname:5432/database_name
UPLOAD_FOLDER=/data/uploads
# =============================================================================
# FUTURE: Additional Provider Notes
# =============================================================================
# The connector architecture is designed to support additional providers.
# Future connectors may include:
#
# - Deepgram: Known for excellent diarization and real-time transcription
# - AssemblyAI: Strong diarization with speaker labels
# - Google Cloud Speech-to-Text: Enterprise-grade with speaker diarization
#
# To request a new connector, please open an issue on GitHub.

256
config/env.whisper.example Normal file
View File

@@ -0,0 +1,256 @@
# -----------------------------------------------------------------------------
# DictIA Configuration: Standard Whisper API (Legacy)
#
# ⚠️ DEPRECATION NOTICE: This configuration style is still supported but
# we recommend using the new unified configuration in env.transcription.example
# which supports all transcription providers with auto-detection.
#
# Migration: See TRANSCRIPTION_CONNECTOR documentation in env.transcription.example
# For OpenAI Whisper, simply set:
# TRANSCRIPTION_API_KEY=your_key
# TRANSCRIPTION_MODEL=whisper-1 (or gpt-4o-transcribe-diarize for diarization)
#
# Instructions:
# 1. Copy this file to a new file named .env
# cp env.whisper.example .env
# 2. Fill in the required API keys and settings below.
# -----------------------------------------------------------------------------
# --- Text Generation Model (for summaries, titles, etc.) ---
TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
TEXT_MODEL_API_KEY=your_openrouter_api_key
TEXT_MODEL_NAME=openai/gpt-4o-mini
# --- GPT-5 Specific Settings (only used with OpenAI API and GPT-5 models) ---
# If using GPT-5 models (gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat-latest) with OpenAI API,
# these parameters will be used instead of temperature.
#
# Example GPT-5 configuration:
# TEXT_MODEL_BASE_URL=https://api.openai.com/v1
# TEXT_MODEL_NAME=gpt-5-mini
#
# Reasoning effort: minimal, low, medium, high (default: medium)
# - minimal: Fastest responses, minimal reasoning tokens
# - low: Fast responses with basic reasoning
# - medium: Balanced reasoning and speed (recommended)
# - high: Maximum reasoning for complex tasks
GPT5_REASONING_EFFORT=medium
#
# Verbosity: low, medium, high (default: medium)
# - low: Concise responses
# - medium: Balanced detail
# - high: Detailed explanations
GPT5_VERBOSITY=medium
# --- Chat Model Configuration (Optional) ---
# Configure a separate model for real-time chat interactions.
# If not set, chat will use the TEXT_MODEL_* settings above.
#
# Use cases:
# - Use a faster model for chat while using a more capable model for summarization
# - Use a cheaper model for interactive chat to reduce costs
# - Use different service tiers for different operations
#
# CHAT_MODEL_API_KEY=your_chat_api_key
# CHAT_MODEL_BASE_URL=https://openrouter.ai/api/v1
# CHAT_MODEL_NAME=openai/gpt-4o
# --- Chat GPT-5 Settings (only used with OpenAI API and GPT-5 chat models) ---
# These settings allow independent control of GPT-5 parameters for chat.
# If not set, falls back to the main GPT5_* settings above.
#
# CHAT_GPT5_REASONING_EFFORT=medium
# CHAT_GPT5_VERBOSITY=medium
# --- LLM Streaming Compatibility ---
# Some LLM servers (e.g., certain vLLM configurations) don't support OpenAI's
# stream_options parameter. If chat streaming hangs or fails, try disabling this.
# Note: When disabled, token usage tracking for chat will not be available.
# ENABLE_STREAM_OPTIONS=false
# --- Transcription Service (OpenAI Whisper API) ---
# New connector architecture is enabled by default.
# Available models:
# whisper-1 - Legacy, no diarization
# gpt-4o-transcribe - High quality, no diarization
# gpt-4o-mini-transcribe - Cost-effective, no diarization
# gpt-4o-transcribe-diarize - Speaker diarization! (recommended)
TRANSCRIPTION_BASE_URL=https://api.openai.com/v1
TRANSCRIPTION_API_KEY=your_openai_api_key
TRANSCRIPTION_MODEL=whisper-1
# Legacy model name (deprecated, use TRANSCRIPTION_MODEL instead)
# WHISPER_MODEL=whisper-1
# --- Application Settings ---
# Set to "true" to allow user registration, "false" to disable
ALLOW_REGISTRATION=false
# Comma-separated list of allowed email domains for registration.
# Leave empty to allow all domains. Example: company.com,subsidiary.org
REGISTRATION_ALLOWED_DOMAINS=
SUMMARY_MAX_TOKENS=8000
CHAT_MAX_TOKENS=5000
# Timezone for displaying dates and times in the UI
# Use a valid TZ database name (e.g., "America/New_York", "Europe/London", "UTC")
TIMEZONE="UTC"
# Set the logging level for the application.
# Options: DEBUG, INFO, WARNING, ERROR
LOG_LEVEL="INFO"
# --- Large File Chunking ---
# Chunking is now connector-aware:
# - openai_transcribe/asr_endpoint: Handled internally, these settings ignored
# - openai_whisper: Uses these settings for files >25MB
#
# ENABLE_CHUNKING=false # Uncomment to disable (only for openai_whisper)
# Chunk limit - supports size (20MB) or duration (600s, 10m)
CHUNK_LIMIT=20MB
# Overlap between chunks (seconds)
CHUNK_OVERLAP_SECONDS=3
# --- Audio Compression ---
# Automatically compress lossless uploads (WAV, AIFF) to save storage
AUDIO_COMPRESS_UPLOADS=true
# Target codec: mp3 (lossy, smallest), flac (lossless), opus (lossy, efficient)
AUDIO_CODEC=mp3
# Bitrate for lossy codecs (ignored for FLAC)
AUDIO_BITRATE=128k
# Unsupported codecs - comma-separated list of codecs to exclude from supported list
# Use this if your transcription service doesn't support certain codecs
# Supported codecs by default: pcm_s16le, pcm_s24le, pcm_f32le, mp3, flac, opus, vorbis, aac
# Example: AUDIO_UNSUPPORTED_CODECS=opus,vorbis
# AUDIO_UNSUPPORTED_CODECS=
# --- Admin User (created on first run) ---
ADMIN_USERNAME=admin
ADMIN_EMAIL=admin@example.com
ADMIN_PASSWORD=changeme
# --- Inquire Mode (AI search across all recordings) ---
# Set to "true" to enable semantic search and chat across all recordings
# Requires additional dependencies (already included in Docker image)
ENABLE_INQUIRE_MODE=false
# --- Automated File Processing (Black Hole Directory) ---
# Set to "true" to enable automated file processing
ENABLE_AUTO_PROCESSING=false
# --- Automated Export Settings ---
# Automatically export transcriptions and summaries to markdown files
ENABLE_AUTO_EXPORT=false
# Directory where exports will be saved (per-user subdirectories created automatically)
AUTO_EXPORT_DIR=/data/exports
# What to include in exports
AUTO_EXPORT_TRANSCRIPTION=true
AUTO_EXPORT_SUMMARY=true
# Processing mode: admin_only, user_directories, or single_user
AUTO_PROCESS_MODE=admin_only
# Directory to watch for new audio files
AUTO_PROCESS_WATCH_DIR=/data/auto-process
# How often to check for new files (seconds)
AUTO_PROCESS_CHECK_INTERVAL=30
# How long to wait (seconds) to confirm a file has stopped changing before processing.
# Increase for slow network transfers (NFS, SMB). Default: 5
# AUTO_PROCESS_STABILITY_TIME=5
# Default username for single_user mode (only used if AUTO_PROCESS_MODE=single_user)
# AUTO_PROCESS_DEFAULT_USERNAME=admin
# --- Auto-Deletion & Retention Settings ---
# Enable automated deletion of old recordings
ENABLE_AUTO_DELETION=false
# Number of days to retain recordings (0 = disabled)
# Example: 90 means recordings older than 90 days will be processed
GLOBAL_RETENTION_DAYS=90
# Deletion mode: 'audio_only' keeps transcription, 'full_recording' deletes everything
# audio_only: Deletes audio file but keeps transcription/summary/notes (recommended)
# full_recording: Permanently deletes the entire recording from database
DELETION_MODE=audio_only
# --- Permission-Based Deletion Controls ---
# Allow all users to delete their recordings, or restrict to admins only
# true: All users can delete their own recordings (default)
# false: Only admins can delete recordings
USERS_CAN_DELETE=true
# Delete speaker profiles when all their recordings are removed.
# Default: false (speaker profiles and voice embeddings are preserved)
# Set to true for privacy-sensitive deployments where biometric voice data
# should not outlive the recordings it was derived from.
# DELETE_ORPHANED_SPEAKERS=false
# --- Internal Sharing Settings ---
# Enable user-to-user sharing of recordings (works independently of groups)
ENABLE_INTERNAL_SHARING=false
# Show usernames in the UI (when sharing/viewing shared recordings)
# true: Display usernames throughout the interface
# false: Hide usernames (users must know each other's usernames to share)
SHOW_USERNAMES_IN_UI=false
# --- Public Sharing Settings ---
# Enable creation of public share links (anonymous access)
# true: Users can create public links to share recordings externally (default)
# false: Public sharing is disabled globally
ENABLE_PUBLIC_SHARING=true
# Note: Admins can control public sharing permissions per-user in the admin dashboard
# even when ENABLE_PUBLIC_SHARING is true
# --- Incognito Mode (HIPAA-friendly) ---
# Enable incognito mode for privacy-sensitive transcriptions
# When enabled, users can upload recordings that are:
# - Processed on the server but NOT saved to the database
# - Stored only in the browser's sessionStorage (lost when tab closes)
# - Audio files are immediately deleted after processing
# Useful for HIPAA compliance or sensitive recordings
# Default: false (feature hidden)
ENABLE_INCOGNITO_MODE=false
# Make incognito mode the default for in-app recordings (toggle starts ON)
INCOGNITO_MODE_DEFAULT=false
# --- Video Retention ---
# When enabled, uploaded video files keep their video stream for in-browser playback
# The audio is extracted to a temp file for transcription, then cleaned up
# Default: false (video uploads extract audio only, video stream is discarded)
VIDEO_RETENTION=false
# --- Concurrent Uploads ---
# Maximum number of simultaneous file uploads (default: 3)
MAX_CONCURRENT_UPLOADS=3
# --- Background Processing Queues ---
# Separate queues for transcription (slow) and summary (fast) jobs
# This prevents slow ASR jobs from blocking quick summary generation
# Transcription queue workers (for ASR processing, default: 2)
JOB_QUEUE_WORKERS=2
# Summary queue workers (for LLM summarization, default: 2)
SUMMARY_QUEUE_WORKERS=2
# Maximum retry attempts for failed jobs (default: 3)
JOB_MAX_RETRIES=3
# --- Docker Settings (rarely need to be changed) ---
# Database URI - SQLite (default) or PostgreSQL
SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
# For PostgreSQL, use: postgresql://username:password@hostname:5432/database_name
# Example: postgresql://speakr:password@postgres:5432/speakr
UPLOAD_FOLDER=/data/uploads

241
config/env.whisperx.example Normal file
View File

@@ -0,0 +1,241 @@
# -----------------------------------------------------------------------------
# DictIA Configuration: WhisperX ASR Endpoint (with Voice Profiles)
#
# ⚠️ DEPRECATION NOTICE: This configuration style is still supported but
# we recommend using the new unified configuration in env.transcription.example
# which supports all transcription providers with auto-detection.
#
# Migration: Simply set ASR_BASE_URL and the connector will auto-detect ASR mode.
# USE_ASR_ENDPOINT=true is no longer required (but still works for backwards compat).
#
# This configuration is for use with the WhisperX ASR Service:
# https://github.com/murtaza-nasir/whisperx-asr-service
#
# Features supported:
# - Speaker diarization with pyannote/speaker-diarization-community-1
# - Voice profile embeddings (256-dimensional) for speaker recognition
# - Automatic speaker matching across recordings
# - Better timestamp alignment between speakers and words
#
# Instructions:
# 1. Copy this file to a new file named .env
# cp config/env.whisperx.example .env
# 2. Fill in the required URLs, API keys, and settings below.
# 3. Set up WhisperX ASR Service (see installation guide)
# -----------------------------------------------------------------------------
# --- Text Generation Model (for summaries, titles, etc.) ---
TEXT_MODEL_BASE_URL=https://openrouter.ai/api/v1
TEXT_MODEL_API_KEY=your_openrouter_api_key
TEXT_MODEL_NAME=openai/gpt-4o-mini
# --- GPT-5 Specific Settings (only used with OpenAI API and GPT-5 models) ---
# If using GPT-5 models (gpt-5, gpt-5-mini, gpt-5-nano, gpt-5-chat-latest) with OpenAI API,
# these parameters will be used instead of temperature.
#
# Example GPT-5 configuration:
# TEXT_MODEL_BASE_URL=https://api.openai.com/v1
# TEXT_MODEL_NAME=gpt-5-mini
#
# Reasoning effort: minimal, low, medium, high (default: medium)
# - minimal: Fastest responses, minimal reasoning tokens
# - low: Fast responses with basic reasoning
# - medium: Balanced reasoning and speed (recommended)
# - high: Maximum reasoning for complex tasks
GPT5_REASONING_EFFORT=medium
#
# Verbosity: low, medium, high (default: medium)
# - low: Concise responses
# - medium: Balanced detail
# - high: Detailed explanations
GPT5_VERBOSITY=medium
# --- Auto-Identify Speaker Response Format ---
# When enabled, auto-identify uses JSON Schema response format (structured outputs)
# to constrain LLM output to valid SPEAKER_XX keys. Falls back to json_object mode
# if the model doesn't support it. Leave disabled for widest model compatibility.
# AUTO_IDENTIFY_RESPONSE_SCHEMA=1
# --- Chat Model Configuration (Optional) ---
# Configure a separate model for real-time chat interactions.
# If not set, chat will use the TEXT_MODEL_* settings above.
#
# Use cases:
# - Use a faster model for chat while using a more capable model for summarization
# - Use a cheaper model for interactive chat to reduce costs
# - Use different service tiers for different operations
#
# CHAT_MODEL_API_KEY=your_chat_api_key
# CHAT_MODEL_BASE_URL=https://openrouter.ai/api/v1
# CHAT_MODEL_NAME=openai/gpt-4o
# --- Chat GPT-5 Settings (only used with OpenAI API and GPT-5 chat models) ---
# These settings allow independent control of GPT-5 parameters for chat.
# If not set, falls back to the main GPT5_* settings above.
#
# CHAT_GPT5_REASONING_EFFORT=medium
# CHAT_GPT5_VERBOSITY=medium
# --- Transcription Service (WhisperX ASR Endpoint) ---
# New connector architecture auto-detects ASR mode when ASR_BASE_URL is set.
# USE_ASR_ENDPOINT=true is deprecated but still works for backwards compatibility.
#
# Note: ASR endpoints handle chunking internally - CHUNK_LIMIT settings are ignored.
# WhisperX ASR Endpoint URL (setting this auto-enables ASR mode)
# For containers in same docker-compose: Use container name and internal port
# Example: http://whisperx-asr:9000 (NOT the host port or external IP)
# For external ASR: Use http://192.168.1.100:9000 or http://asr.example.com:9000
ASR_BASE_URL=http://whisperx-asr:9000
# Deprecated: No longer needed, kept for backwards compatibility
# USE_ASR_ENDPOINT=true
# Speaker diarization options
ASR_DIARIZE=true
# ASR_MIN_SPEAKERS=1 # Hint for minimum speakers
# ASR_MAX_SPEAKERS=5 # Default maximum speakers
# Enable speaker embeddings for voice profile matching (WhisperX only)
ASR_RETURN_SPEAKER_EMBEDDINGS=true
# --- Application Settings ---
# Set to "true" to allow user registration, "false" to disable
ALLOW_REGISTRATION=false
# Comma-separated list of allowed email domains for registration.
# Leave empty to allow all domains. Example: company.com,subsidiary.org
REGISTRATION_ALLOWED_DOMAINS=
SUMMARY_MAX_TOKENS=8000
CHAT_MAX_TOKENS=5000
# Timezone for displaying dates and times in the UI
# Use a valid TZ database name (e.g., "America/New_York", "Europe/London", "UTC")
TIMEZONE="UTC"
# Set the logging level for the application.
# Options: DEBUG, INFO, WARNING, ERROR
LOG_LEVEL="INFO"
# --- Audio Compression ---
# Automatically compress lossless uploads (WAV, AIFF) to save storage
AUDIO_COMPRESS_UPLOADS=true
# Target codec: mp3 (lossy, smallest), flac (lossless), opus (lossy, efficient)
AUDIO_CODEC=mp3
# Bitrate for lossy codecs (ignored for FLAC)
AUDIO_BITRATE=128k
# --- Admin User (created on first run) ---
ADMIN_USERNAME=admin
ADMIN_EMAIL=admin@example.com
ADMIN_PASSWORD=changeme
# --- Inquire Mode (AI search across all recordings) ---
# Set to "true" to enable semantic search and chat across all recordings
# Requires additional dependencies (already included in Docker image)
ENABLE_INQUIRE_MODE=false
# --- Automated File Processing (Black Hole Directory) ---
# Set to "true" to enable automated file processing
ENABLE_AUTO_PROCESSING=false
# --- Automated Export Settings ---
# Automatically export transcriptions and summaries to markdown files
ENABLE_AUTO_EXPORT=false
# Directory where exports will be saved (per-user subdirectories created automatically)
AUTO_EXPORT_DIR=/data/exports
# What to include in exports
AUTO_EXPORT_TRANSCRIPTION=true
AUTO_EXPORT_SUMMARY=true
# Processing mode: admin_only, user_directories, or single_user
AUTO_PROCESS_MODE=admin_only
# Directory to watch for new audio files
AUTO_PROCESS_WATCH_DIR=/data/auto-process
# How often to check for new files (seconds)
AUTO_PROCESS_CHECK_INTERVAL=30
# How long to wait (seconds) to confirm a file has stopped changing before processing.
# Increase for slow network transfers (NFS, SMB). Default: 5
# AUTO_PROCESS_STABILITY_TIME=5
# Default username for single_user mode (only used if AUTO_PROCESS_MODE=single_user)
# AUTO_PROCESS_DEFAULT_USERNAME=admin
# --- Auto-Deletion & Retention Settings ---
# Enable automated deletion of old recordings
ENABLE_AUTO_DELETION=false
# Number of days to retain recordings (0 = disabled)
# Example: 90 means recordings older than 90 days will be processed
GLOBAL_RETENTION_DAYS=90
# Deletion mode: 'audio_only' keeps transcription, 'full_recording' deletes everything
# audio_only: Deletes audio file but keeps transcription/summary/notes (recommended)
# full_recording: Permanently deletes the entire recording from database
DELETION_MODE=audio_only
# --- Permission-Based Deletion Controls ---
# Allow all users to delete their recordings, or restrict to admins only
# true: All users can delete their own recordings (default)
# false: Only admins can delete recordings
USERS_CAN_DELETE=true
# Delete speaker profiles when all their recordings are removed.
# Default: false (speaker profiles and voice embeddings are preserved)
# Set to true for privacy-sensitive deployments where biometric voice data
# should not outlive the recordings it was derived from.
# DELETE_ORPHANED_SPEAKERS=false
# --- Internal Sharing Settings ---
# Enable user-to-user sharing of recordings (works independently of groups)
ENABLE_INTERNAL_SHARING=false
# Show usernames in the UI (when sharing/viewing shared recordings)
# true: Display usernames throughout the interface
# false: Hide usernames (users must know each other's usernames to share)
SHOW_USERNAMES_IN_UI=false
# --- Public Sharing Settings ---
# Enable creation of public share links (anonymous access)
# true: Users can create public links to share recordings externally (default)
# false: Public sharing is disabled globally
ENABLE_PUBLIC_SHARING=true
# Note: Admins can control public sharing permissions per-user in the admin dashboard
# even when ENABLE_PUBLIC_SHARING is true
# --- Video Retention ---
# When enabled, uploaded video files keep their video stream for in-browser playback
# The audio is extracted to a temp file for transcription, then cleaned up
# Default: false (video uploads extract audio only, video stream is discarded)
VIDEO_RETENTION=false
# --- Concurrent Uploads ---
# Maximum number of simultaneous file uploads (default: 3)
MAX_CONCURRENT_UPLOADS=3
# --- Background Processing Queues ---
# Separate queues for transcription (slow) and summary (fast) jobs
# This prevents slow ASR jobs from blocking quick summary generation
# Transcription queue workers (for ASR processing, default: 2)
JOB_QUEUE_WORKERS=2
# Summary queue workers (for LLM summarization, default: 2)
SUMMARY_QUEUE_WORKERS=2
# Maximum retry attempts for failed jobs (default: 3)
JOB_MAX_RETRIES=3
# --- Docker Settings (rarely need to be changed) ---
# Database URI - SQLite (default) or PostgreSQL
SQLALCHEMY_DATABASE_URI=sqlite:////data/instance/transcriptions.db
# For PostgreSQL, use: postgresql://username:password@hostname:5432/database_name
# Example: postgresql://speakr:password@postgres:5432/speakr
UPLOAD_FOLDER=/data/uploads