#!/usr/bin/env python3 """ File Exporter for Automated Recording Export Exports transcriptions and summaries as markdown files to a configured directory. Supports per-user subdirectories based on username. Supports customizable export templates with localized labels. """ import os import re import json import logging from datetime import datetime, timedelta from pathlib import Path from werkzeug.utils import secure_filename # Configuration from environment ENABLE_AUTO_EXPORT = os.environ.get('ENABLE_AUTO_EXPORT', 'false').lower() == 'true' AUTO_EXPORT_DIR = os.environ.get('AUTO_EXPORT_DIR', '/data/exports') AUTO_EXPORT_TRANSCRIPTION = os.environ.get('AUTO_EXPORT_TRANSCRIPTION', 'true').lower() == 'true' AUTO_EXPORT_SUMMARY = os.environ.get('AUTO_EXPORT_SUMMARY', 'true').lower() == 'true' # Setup logging logger = logging.getLogger('file_exporter') logger.setLevel(logging.INFO) def format_transcription_with_template(transcription_text, user): """ Format transcription using the user's default template. Args: transcription_text: Raw transcription (JSON or plain text) user: User object to get template from Returns: Formatted transcription string """ # Import here to avoid circular imports from src.models import TranscriptTemplate # Try to parse as JSON try: transcription_data = json.loads(transcription_text) if not isinstance(transcription_data, list): # Not our expected format, return as-is return transcription_text except (json.JSONDecodeError, TypeError): # Not JSON, return as-is return transcription_text # Get user's default template template = TranscriptTemplate.query.filter_by( user_id=user.id, is_default=True ).first() # Default format if no template set if not template: template_format = "[{{speaker}}]: {{text}}" else: template_format = template.template # Helper functions for formatting def format_time(seconds): """Format seconds to HH:MM:SS""" if seconds is None: return "00:00:00" td = timedelta(seconds=seconds) hours = int(td.total_seconds() // 3600) minutes = int((td.total_seconds() % 3600) // 60) secs = int(td.total_seconds() % 60) return f"{hours:02d}:{minutes:02d}:{secs:02d}" def format_srt_time(seconds): """Format seconds to SRT format HH:MM:SS,mmm""" if seconds is None: return "00:00:00,000" td = timedelta(seconds=seconds) hours = int(td.total_seconds() // 3600) minutes = int((td.total_seconds() % 3600) // 60) secs = int(td.total_seconds() % 60) millis = int((td.total_seconds() % 1) * 1000) return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}" # Generate formatted transcript output_lines = [] for index, segment in enumerate(transcription_data, 1): line = template_format # Replace variables replacements = { '{{index}}': str(index), '{{speaker}}': segment.get('speaker', 'Unknown'), '{{text}}': segment.get('sentence', ''), '{{start_time}}': format_time(segment.get('start_time')), '{{end_time}}': format_time(segment.get('end_time')), } for key, value in replacements.items(): line = line.replace(key, value) # Handle filters # Upper case filter line = re.sub(r'{{(.*?)\|upper}}', lambda m: replacements.get('{{' + m.group(1) + '}}', '').upper(), line) # SRT time filter line = re.sub(r'{{start_time\|srt}}', format_srt_time(segment.get('start_time')), line) line = re.sub(r'{{end_time\|srt}}', format_srt_time(segment.get('end_time')), line) output_lines.append(line) return '\n'.join(output_lines) def get_export_directory(user): """Get the export directory for a user, creating if needed.""" base_dir = Path(AUTO_EXPORT_DIR) # Create per-user subdirectory based on username user_dir = base_dir / secure_filename(user.username) user_dir.mkdir(parents=True, exist_ok=True) return user_dir def generate_safe_filename(recording): """Generate a safe filename for the export based on recording ID only.""" # Use only recording ID for consistent filename that doesn't change return f"recording_{recording.id}" def get_export_filepath(user, recording): """Get the full export filepath for a recording.""" export_dir = get_export_directory(user) filename = generate_safe_filename(recording) return export_dir / f"{filename}.md" def mark_export_as_deleted(recording_id): """ Rename the export file to indicate the recording was deleted. Args: recording_id: ID of the deleted recording Returns: New filepath if renamed, None otherwise """ if not ENABLE_AUTO_EXPORT: return None # Import here to avoid circular imports from src.app import app, db from src.models import Recording, User with app.app_context(): try: # We need to find the file - check all user directories base_dir = Path(AUTO_EXPORT_DIR) if not base_dir.exists(): return None # Look for the file in all user subdirectories for user_dir in base_dir.iterdir(): if user_dir.is_dir(): old_filepath = user_dir / f"recording_{recording_id}.md" if old_filepath.exists(): new_filepath = user_dir / f"[deleted]_recording_{recording_id}.md" old_filepath.rename(new_filepath) logger.info(f"Marked export as deleted: {new_filepath}") return str(new_filepath) return None except Exception as e: logger.error(f"Failed to mark export as deleted for recording {recording_id}: {e}") return None def format_duration(seconds): """Format duration in seconds to human-readable string.""" if not seconds: return "" hours = seconds // 3600 minutes = (seconds % 3600) // 60 secs = seconds % 60 if hours > 0: return f"{hours}h {minutes}m {secs}s" elif minutes > 0: return f"{minutes}m {secs}s" else: return f"{secs}s" def format_file_size(bytes_size): """Format file size in bytes to human-readable string.""" if not bytes_size: return "" for unit in ['B', 'KB', 'MB', 'GB']: if bytes_size < 1024: return f"{bytes_size:.1f} {unit}" bytes_size /= 1024 return f"{bytes_size:.1f} TB" def get_user_export_template(user, recording=None): """ Get the export template to use for a recording. Resolution order: 1. Folder's export_template_id (if recording is in a folder) 2. Tag's export_template_id (first matching tag with an export template) 3. User's default export template (is_default=True) Args: user: User object recording: Optional Recording object (for folder/tag lookup) Returns: ExportTemplate object or None """ from src.models import ExportTemplate # 1. Check folder's export template if recording and recording.folder and recording.folder.export_template_id: template = ExportTemplate.query.get(recording.folder.export_template_id) if template: return template # 2. Check tags' export templates if recording and recording.tags: for tag in recording.tags: if tag.export_template_id: template = ExportTemplate.query.get(tag.export_template_id) if template: return template # 3. Fall back to user's default return ExportTemplate.query.filter_by( user_id=user.id, is_default=True ).first() def render_export_template(template_str, context, labels): """ Render an export template with variable substitution and conditionals. Args: template_str: Template string with {{variables}} and {{#if var}}...{{/if}} blocks context: Dictionary of variable values labels: Dictionary of localized labels Returns: Rendered string """ result = template_str # Process conditionals first: {{#if variable}}content{{/if}} def replace_conditional(match): var_name = match.group(1) content = match.group(2) # Check if the variable exists and is truthy value = context.get(var_name, '') if value: return content return '' # Match {{#if var}}...{{/if}} blocks (non-greedy) conditional_pattern = r'\{\{#if\s+(\w+)\}\}(.*?)\{\{/if\}\}' result = re.sub(conditional_pattern, replace_conditional, result, flags=re.DOTALL) # Replace label variables: {{label.key}} def replace_label(match): key = match.group(1) return labels.get(key, key) result = re.sub(r'\{\{label\.(\w+)\}\}', replace_label, result) # Replace context variables: {{variable}} for key, value in context.items(): placeholder = '{{' + key + '}}' result = result.replace(placeholder, str(value) if value else '') return result def generate_markdown_content(recording, user, include_transcription=True, include_summary=True): """Generate markdown content for a recording export. Args: recording: Recording object to export user: User object for getting template preferences include_transcription: Whether to include transcription include_summary: Whether to include summary """ from src.utils.localization import get_export_labels, format_date_localized, format_datetime_localized # Get user's language preference (default to English) user_language = getattr(user, 'ui_language', 'en') or 'en' # Get localized labels labels = get_export_labels(user_language) # Get export template (checks folder, tags, then user default) export_template = get_user_export_template(user, recording) if export_template: # Use custom template return generate_from_template( recording, user, export_template.template, labels, user_language, include_transcription, include_summary ) else: # Use default (backwards compatible) behavior return generate_default_markdown( recording, user, labels, user_language, include_transcription, include_summary ) def generate_from_template(recording, user, template_str, labels, user_language, include_transcription=True, include_summary=True): """ Generate markdown content using a custom template. Args: recording: Recording object user: User object template_str: Template string labels: Localized labels dictionary user_language: User's language code include_transcription: Whether to include transcription include_summary: Whether to include summary Returns: Rendered markdown string """ from src.utils.localization import format_date_localized, format_datetime_localized # Build context with all available variables context = { 'title': recording.title or f"Recording {recording.id}", 'meeting_date': format_date_localized(recording.meeting_date, user_language) if recording.meeting_date else '', 'created_at': format_datetime_localized(recording.created_at, user_language) if recording.created_at else '', 'original_filename': recording.original_filename or '', 'file_size': format_file_size(recording.file_size) if recording.file_size else '', 'participants': recording.participants or '', 'tags': ', '.join([tag.name for tag in recording.tags]) if recording.tags else '', 'transcription_duration': format_duration(recording.transcription_duration_seconds) if recording.transcription_duration_seconds else '', 'summarization_duration': format_duration(recording.summarization_duration_seconds) if recording.summarization_duration_seconds else '', 'notes': recording.notes or '' if include_summary else '', # Notes included with summary setting 'summary': recording.summary or '' if include_summary else '', 'transcription': '', # Will be set below } # Format transcription if included if include_transcription and recording.transcription: context['transcription'] = format_transcription_with_template(recording.transcription, user) # Render template rendered = render_export_template(template_str, context, labels) # Always append hardcoded footer footer = labels.get('footer', 'Generated with [Speakr](https://github.com/learnedmachine/speakr)') rendered += f"\n\n---\n\n*{footer}*\n" return rendered def generate_default_markdown(recording, user, labels, user_language, include_transcription=True, include_summary=True): """ Generate markdown using the default (backwards compatible) format. Args: recording: Recording object user: User object labels: Localized labels dictionary user_language: User's language code include_transcription: Whether to include transcription include_summary: Whether to include summary Returns: Rendered markdown string """ from src.utils.localization import format_date_localized, format_datetime_localized lines = [] # Header with title title = recording.title or f"Recording {recording.id}" lines.append(f"# {title}") lines.append("") # Metadata section lines.append(f"## {labels.get('metadata', 'Metadata')}") lines.append("") if recording.meeting_date: date_str = format_date_localized(recording.meeting_date, user_language) lines.append(f"- **{labels.get('date', 'Date')}:** {date_str}") if recording.created_at: created_str = format_datetime_localized(recording.created_at, user_language) lines.append(f"- **{labels.get('created', 'Created')}:** {created_str}") if recording.original_filename: lines.append(f"- **{labels.get('originalFile', 'Original File')}:** {recording.original_filename}") if recording.file_size: lines.append(f"- **{labels.get('fileSize', 'File Size')}:** {format_file_size(recording.file_size)}") if recording.participants: lines.append(f"- **{labels.get('participants', 'Participants')}:** {recording.participants}") if recording.tags: tag_names = [tag.name for tag in recording.tags] lines.append(f"- **{labels.get('tags', 'Tags')}:** {', '.join(tag_names)}") if recording.transcription_duration_seconds: lines.append(f"- **{labels.get('transcriptionTime', 'Transcription Time')}:** {format_duration(recording.transcription_duration_seconds)}") if recording.summarization_duration_seconds: lines.append(f"- **{labels.get('summarizationTime', 'Summarization Time')}:** {format_duration(recording.summarization_duration_seconds)}") lines.append("") # Notes section (if available) if recording.notes: lines.append(f"## {labels.get('notes', 'Notes')}") lines.append("") lines.append(recording.notes) lines.append("") # Summary section if include_summary and recording.summary: lines.append(f"## {labels.get('summary', 'Summary')}") lines.append("") lines.append(recording.summary) lines.append("") # Transcription section if include_transcription and recording.transcription: lines.append(f"## {labels.get('transcription', 'Transcription')}") lines.append("") # Format transcription using user's template formatted_transcription = format_transcription_with_template(recording.transcription, user) lines.append(formatted_transcription) lines.append("") # Footer lines.append("---") lines.append("") footer = labels.get('footer', 'Generated with [Speakr](https://github.com/learnedmachine/speakr)') lines.append(f"*{footer}*") lines.append("") return "\n".join(lines) def export_recording(recording_id): """ Export a recording to markdown file. Args: recording_id: ID of the recording to export Returns: Path to the exported file, or None if export failed/disabled """ if not ENABLE_AUTO_EXPORT: return None # Check if we should export anything if not AUTO_EXPORT_TRANSCRIPTION and not AUTO_EXPORT_SUMMARY: logger.warning("Auto-export is enabled but both transcription and summary export are disabled") return None # Import here to avoid circular imports from src.app import app, db from src.models import Recording, User with app.app_context(): try: recording = db.session.get(Recording, recording_id) if not recording: logger.error(f"Recording {recording_id} not found for export") return None # Get the owner user = db.session.get(User, recording.user_id) if not user: logger.error(f"User not found for recording {recording_id}") return None # Check if we have content to export has_transcription = bool(recording.transcription) and AUTO_EXPORT_TRANSCRIPTION has_summary = bool(recording.summary) and AUTO_EXPORT_SUMMARY if not has_transcription and not has_summary: logger.debug(f"Recording {recording_id} has no content to export") return None # Get export directory for user export_dir = get_export_directory(user) # Generate filename and path filename = generate_safe_filename(recording) filepath = export_dir / f"{filename}.md" # Generate content content = generate_markdown_content( recording, user, include_transcription=AUTO_EXPORT_TRANSCRIPTION, include_summary=AUTO_EXPORT_SUMMARY ) # Write to file (overwrites if exists) filepath.write_text(content, encoding='utf-8') logger.info(f"Exported recording {recording_id} to {filepath}") return str(filepath) except Exception as e: logger.error(f"Failed to export recording {recording_id}: {e}") return None def initialize_export_directory(): """Initialize the export directory on startup.""" if not ENABLE_AUTO_EXPORT: return try: export_dir = Path(AUTO_EXPORT_DIR) export_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Auto-export enabled, directory: {AUTO_EXPORT_DIR}") if AUTO_EXPORT_TRANSCRIPTION and AUTO_EXPORT_SUMMARY: logger.info("Exporting: transcription and summary") elif AUTO_EXPORT_TRANSCRIPTION: logger.info("Exporting: transcription only") elif AUTO_EXPORT_SUMMARY: logger.info("Exporting: summary only") else: logger.warning("Auto-export enabled but no content types selected") except Exception as e: logger.error(f"Failed to initialize export directory: {e}")