dictia-public/src/init_db.py

"""
Database initialization and migration logic.

This module handles:
- Database schema creation
- Column migrations (adding missing columns to existing tables)
- Default system settings initialization
- Existing recordings migration for inquire mode
"""

import os
import fcntl
import tempfile
from sqlalchemy import text, inspect

from src.database import db
from src.models import Recording, TranscriptChunk, SystemSetting
from src.services.embeddings import process_recording_chunks
from src.utils import add_column_if_not_exists, migrate_column_type, create_index_if_not_exists

# Configuration
ENABLE_INQUIRE_MODE = os.environ.get('ENABLE_INQUIRE_MODE', 'false').lower() == 'true'


def initialize_database(app):
    """
    Initialize database schema and run migrations.

    This function should be called within an app context.
    """
    db.create_all()

    # Check and add new columns if they don't exist
    engine = db.engine

    # Enable WAL mode for SQLite (better concurrent write performance)
    if engine.name == 'sqlite':
        try:
            with engine.connect() as conn:
                conn.execute(text('PRAGMA journal_mode=WAL'))
                conn.commit()
                app.logger.info("SQLite WAL mode enabled for better concurrency")
        except Exception as e:
            app.logger.warning(f"Could not enable WAL mode: {e}")

    try:
        # Add is_inbox column with default value of 1 (True)
        if add_column_if_not_exists(engine, 'recording', 'is_inbox', 'BOOLEAN DEFAULT 1'):
            app.logger.info("Added is_inbox column to recording table")

        # Add is_highlighted column with default value of 0 (False)
        if add_column_if_not_exists(engine, 'recording', 'is_highlighted', 'BOOLEAN DEFAULT 0'):
            app.logger.info("Added is_highlighted column to recording table")

        # Add language preference columns to User table
        if add_column_if_not_exists(engine, 'user', 'transcription_language', 'VARCHAR(10)'):
            app.logger.info("Added transcription_language column to user table")

        # Add extract_events column to User table
        if add_column_if_not_exists(engine, 'user', 'extract_events', 'BOOLEAN DEFAULT 0'):
            app.logger.info("Added extract_events column to user table")
        if add_column_if_not_exists(engine, 'user', 'output_language', 'VARCHAR(50)'):
            app.logger.info("Added output_language column to user table")
        if add_column_if_not_exists(engine, 'user', 'summary_prompt', 'TEXT'):
            app.logger.info("Added summary_prompt column to user table")
        if add_column_if_not_exists(engine, 'user', 'name', 'VARCHAR(100)'):
            app.logger.info("Added name column to user table")
        if add_column_if_not_exists(engine, 'user', 'job_title', 'VARCHAR(100)'):
            app.logger.info("Added job_title column to user table")
        if add_column_if_not_exists(engine, 'user', 'company', 'VARCHAR(100)'):
            app.logger.info("Added company column to user table")
        if add_column_if_not_exists(engine, 'user', 'diarize', 'BOOLEAN'):
            app.logger.info("Added diarize column to user table")
        if add_column_if_not_exists(engine, 'user', 'ui_language', "VARCHAR(10) DEFAULT 'en'"):
            app.logger.info("Added ui_language column to user table")
        if add_column_if_not_exists(engine, 'user', 'sso_provider', 'VARCHAR(100)'):
            app.logger.info("Added sso_provider column to user table")
        if add_column_if_not_exists(engine, 'user', 'sso_subject', 'VARCHAR(255)'):
            app.logger.info("Added sso_subject column to user table")

        # Make password column nullable for SSO users
        try:
            inspector = inspect(engine)
            if 'user' in inspector.get_table_names():
                if engine.name == 'sqlite':
                    # SQLite doesn't support ALTER COLUMN, so we need to check and recreate
                    with engine.connect() as conn:
                        result = conn.execute(text("SELECT sql FROM sqlite_master WHERE type='table' AND name='user'"))
                        schema = result.scalar()

                        if schema and 'password VARCHAR(60) NOT NULL' in schema:
                            app.logger.info("Migrating user table to make password nullable for SSO support...")

                            conn.execute(text("""
                                CREATE TABLE user_new (
                                    id INTEGER NOT NULL,
                                    username VARCHAR(20) NOT NULL,
                                    email VARCHAR(120) NOT NULL,
                                    password VARCHAR(60),
                                    is_admin BOOLEAN,
                                    can_share_publicly BOOLEAN,
                                    transcription_language VARCHAR(10),
                                    output_language VARCHAR(50),
                                    ui_language VARCHAR(10),
                                    summary_prompt TEXT,
                                    extract_events BOOLEAN,
                                    name VARCHAR(100),
                                    job_title VARCHAR(100),
                                    company VARCHAR(100),
                                    diarize BOOLEAN,
                                    sso_provider VARCHAR(100),
                                    sso_subject VARCHAR(255),
                                    PRIMARY KEY (id),
                                    UNIQUE (username),
                                    UNIQUE (email)
                                )
                            """))
                            conn.execute(text("""
                                INSERT INTO user_new
                                SELECT id, username, email, password, is_admin, can_share_publicly,
                                       transcription_language, output_language, ui_language,
                                       summary_prompt, extract_events, name, job_title, company,
                                       diarize, sso_provider, sso_subject
                                FROM user
                            """))
                            conn.execute(text("DROP TABLE user"))
                            conn.execute(text("ALTER TABLE user_new RENAME TO user"))
                            conn.execute(text('CREATE UNIQUE INDEX IF NOT EXISTS ix_user_sso_subject ON "user" (sso_subject)'))
                            conn.commit()
                            app.logger.info("Successfully made password column nullable for SSO support")
                        else:
                            app.logger.info("Password column is already nullable, skipping migration")

                elif engine.name == 'postgresql':
                    # PostgreSQL supports ALTER COLUMN directly
                    with engine.connect() as conn:
                        result = conn.execute(text("""
                            SELECT is_nullable FROM information_schema.columns
                            WHERE table_name = 'user' AND column_name = 'password'
                        """))
                        row = result.fetchone()
                        if row and row[0] == 'NO':
                            conn.execute(text('ALTER TABLE "user" ALTER COLUMN password DROP NOT NULL'))
                            conn.commit()
                            app.logger.info("Made password column nullable for SSO support (PostgreSQL)")
                        else:
                            app.logger.info("Password column is already nullable, skipping migration")
        except Exception as e:
            app.logger.warning(f"Could not migrate password column to nullable (may cause issues with SSO): {e}")

        if add_column_if_not_exists(engine, 'recording', 'mime_type', 'VARCHAR(100)'):
            app.logger.info("Added mime_type column to recording table")
        if add_column_if_not_exists(engine, 'recording', 'completed_at', 'DATETIME'):
            app.logger.info("Added completed_at column to recording table")
        if add_column_if_not_exists(engine, 'recording', 'processing_time_seconds', 'INTEGER'):
            app.logger.info("Added processing_time_seconds column to recording table")
        if add_column_if_not_exists(engine, 'recording', 'transcription_duration_seconds', 'INTEGER'):
            app.logger.info("Added transcription_duration_seconds column to recording table")
        if add_column_if_not_exists(engine, 'recording', 'summarization_duration_seconds', 'INTEGER'):
            app.logger.info("Added summarization_duration_seconds column to recording table")
        if add_column_if_not_exists(engine, 'recording', 'processing_source', "VARCHAR(50) DEFAULT 'upload'"):
            app.logger.info("Added processing_source column to recording table")
        if add_column_if_not_exists(engine, 'recording', 'error_message', 'TEXT'):
            app.logger.info("Added error_message column to recording table")

        # Add columns to recording_tags for order tracking
        if add_column_if_not_exists(engine, 'recording_tags', 'added_at', 'DATETIME'):
            app.logger.info("Added added_at column to recording_tags table")
        if add_column_if_not_exists(engine, 'recording_tags', 'order', '"order" INTEGER DEFAULT 0'):
            app.logger.info("Added order column to recording_tags table")

        # Add auto-deletion and retention columns
        if add_column_if_not_exists(engine, 'recording', 'audio_deleted_at', 'DATETIME'):
            app.logger.info("Added audio_deleted_at column to recording table")
        if add_column_if_not_exists(engine, 'recording', 'deletion_exempt', 'BOOLEAN DEFAULT 0'):
            app.logger.info("Added deletion_exempt column to recording table")
        if add_column_if_not_exists(engine, 'tag', 'protect_from_deletion', 'BOOLEAN DEFAULT 0'):
            app.logger.info("Added protect_from_deletion column to tag table")

        # Add speaker embeddings column for storing voice embeddings from diarization
        if add_column_if_not_exists(engine, 'recording', 'speaker_embeddings', 'JSON'):
            app.logger.info("Added speaker_embeddings column to recording table")

        # Add speaker voice profile embedding fields
        if add_column_if_not_exists(engine, 'speaker', 'average_embedding', 'BLOB'):
            app.logger.info("Added average_embedding column to speaker table")
        if add_column_if_not_exists(engine, 'speaker', 'embeddings_history', 'JSON'):
            app.logger.info("Added embeddings_history column to speaker table")
        if add_column_if_not_exists(engine, 'speaker', 'embedding_count', 'INTEGER DEFAULT 0'):
            app.logger.info("Added embedding_count column to speaker table")
        if add_column_if_not_exists(engine, 'speaker', 'confidence_score', 'REAL'):
            app.logger.info("Added confidence_score column to speaker table")

        # Add is_new_upload column to processing_job table for tracking upload vs reprocessing jobs
        if add_column_if_not_exists(engine, 'processing_job', 'is_new_upload', 'BOOLEAN DEFAULT 0'):
            app.logger.info("Added is_new_upload column to processing_job table")

        if add_column_if_not_exists(engine, 'tag', 'group_id', 'INTEGER'):
            app.logger.info("Added group_id column to tag table")

        if add_column_if_not_exists(engine, 'tag', 'retention_days', 'INTEGER'):
            app.logger.info("Added retention_days column to tag table")

        # Migrate existing protected tags to use retention_days = -1 for consistency
        # This standardizes the protection mechanism: retention_days = -1 means protected/infinite retention
        try:
            with engine.connect() as conn:
                # Find tags with protect_from_deletion=True but retention_days != -1
                result = conn.execute(text("""
                    SELECT COUNT(*) FROM tag
                    WHERE protect_from_deletion = TRUE
                    AND (retention_days IS NULL OR retention_days != -1)
                """))
                count = result.scalar()

                if count and count > 0:
                    # Migrate these tags to use retention_days = -1
                    conn.execute(text("""
                        UPDATE tag
                        SET retention_days = -1
                        WHERE protect_from_deletion = TRUE
                        AND (retention_days IS NULL OR retention_days != -1)
                    """))
                    conn.commit()
                    app.logger.info(f"Migrated {count} protected tags to use retention_days=-1 (standardized protection format)")
        except Exception as e:
            app.logger.warning(f"Could not migrate protected tags to retention_days=-1: {e}")

        if add_column_if_not_exists(engine, 'tag', 'auto_share_on_apply', 'BOOLEAN DEFAULT 1'):
            app.logger.info("Added auto_share_on_apply column to tag table")

        if add_column_if_not_exists(engine, 'tag', 'share_with_group_lead', 'BOOLEAN DEFAULT 1'):
            app.logger.info("Added share_with_group_lead column to tag table")

        if add_column_if_not_exists(engine, 'user', 'can_share_publicly', 'BOOLEAN DEFAULT 1'):
            app.logger.info("Added can_share_publicly column to user table")

        # Token budget for rate limiting
        if add_column_if_not_exists(engine, 'user', 'monthly_token_budget', 'INTEGER'):
            app.logger.info("Added monthly_token_budget column to user table")

        # Transcription budget for rate limiting (in seconds)
        if add_column_if_not_exists(engine, 'user', 'monthly_transcription_budget', 'INTEGER'):
            app.logger.info("Added monthly_transcription_budget column to user table")

        # Naming templates feature
        if add_column_if_not_exists(engine, 'user', 'default_naming_template_id', 'INTEGER'):
            app.logger.info("Added default_naming_template_id column to user table")

        # Email verification fields
        email_verified_added = add_column_if_not_exists(engine, 'user', 'email_verified', 'BOOLEAN DEFAULT 0')
        if email_verified_added:
            app.logger.info("Added email_verified column to user table")
            # Set all existing users to email_verified=True (grandfathered)
            try:
                with engine.connect() as conn:
                    conn.execute(text('UPDATE "user" SET email_verified = TRUE WHERE email_verified = FALSE OR email_verified IS NULL'))
                    conn.commit()
                    app.logger.info("Set email_verified=True for all existing users (grandfathered)")
            except Exception as e:
                app.logger.warning(f"Could not update existing users email_verified status: {e}")

        if add_column_if_not_exists(engine, 'user', 'email_verification_token', 'VARCHAR(200)'):
            app.logger.info("Added email_verification_token column to user table")
        if add_column_if_not_exists(engine, 'user', 'email_verification_sent_at', 'DATETIME'):
            app.logger.info("Added email_verification_sent_at column to user table")
        if add_column_if_not_exists(engine, 'user', 'password_reset_token', 'VARCHAR(200)'):
            app.logger.info("Added password_reset_token column to user table")
        if add_column_if_not_exists(engine, 'user', 'password_reset_sent_at', 'DATETIME'):
            app.logger.info("Added password_reset_sent_at column to user table")

        # Auto speaker labelling settings
        if add_column_if_not_exists(engine, 'user', 'auto_speaker_labelling', 'BOOLEAN DEFAULT 0'):
            app.logger.info("Added auto_speaker_labelling column to user table")
        if add_column_if_not_exists(engine, 'user', 'auto_speaker_labelling_threshold', "VARCHAR(10) DEFAULT 'medium'"):
            app.logger.info("Added auto_speaker_labelling_threshold column to user table")

        # Auto summarization setting (per-user, default enabled)
        if add_column_if_not_exists(engine, 'user', 'auto_summarization', 'BOOLEAN DEFAULT 1'):
            app.logger.info("Added auto_summarization column to user table")

        # Transcription hints (hotwords and initial prompt for improving ASR accuracy)
        if add_column_if_not_exists(engine, 'user', 'transcription_hotwords', 'TEXT'):
            app.logger.info("Added transcription_hotwords column to user table")
        if add_column_if_not_exists(engine, 'user', 'transcription_initial_prompt', 'TEXT'):
            app.logger.info("Added transcription_initial_prompt column to user table")

        # === B-2.1: MFA / WebAuthn / Stripe / Loi 25 user fields ===
        if add_column_if_not_exists(engine, 'user', 'totp_secret_encrypted', 'VARCHAR(255)'):
            app.logger.info("Added 'totp_secret_encrypted' column to user")
        if add_column_if_not_exists(engine, 'user', 'totp_enabled', 'BOOLEAN DEFAULT 0'):
            app.logger.info("Added 'totp_enabled' column to user")
        if add_column_if_not_exists(engine, 'user', 'webauthn_credentials', 'JSON'):
            app.logger.info("Added webauthn_credentials column to user table")
        # B-2.5: 10 single-use bcrypt-hashed recovery codes for TOTP MFA
        if add_column_if_not_exists(engine, 'user', 'totp_recovery_codes', 'JSON'):
            app.logger.info("Added totp_recovery_codes column to user table")
        if add_column_if_not_exists(engine, 'user', 'ordre_pro', 'VARCHAR(50)'):
            app.logger.info("Added ordre_pro column to user table")
        if add_column_if_not_exists(engine, 'user', 'cabinet', 'VARCHAR(255)'):
            app.logger.info("Added cabinet column to user table")
        if add_column_if_not_exists(engine, 'user', 'stripe_customer_id', 'VARCHAR(120)'):
            app.logger.info("Added stripe_customer_id column to user table")
        if add_column_if_not_exists(engine, 'user', 'subscription_status', 'VARCHAR(20)'):
            app.logger.info("Added subscription_status column to user table")

        # === B-2.1: Indexes on stripe_customer_id and subscription_status ===
        try:
            if create_index_if_not_exists(engine, 'idx_user_stripe_customer', 'user', 'stripe_customer_id'):
                app.logger.info("Created index idx_user_stripe_customer on user.stripe_customer_id")
            if create_index_if_not_exists(engine, 'idx_user_subscription_status', 'user', 'subscription_status'):
                app.logger.info("Created index idx_user_subscription_status on user.subscription_status")
        except Exception as e:
            app.logger.warning(f"Could not create B-2.1 user indexes: {e}")
        if add_column_if_not_exists(engine, 'tag', 'default_hotwords', 'TEXT'):
            app.logger.info("Added default_hotwords column to tag table")
        if add_column_if_not_exists(engine, 'tag', 'default_initial_prompt', 'TEXT'):
            app.logger.info("Added default_initial_prompt column to tag table")
        if add_column_if_not_exists(engine, 'folder', 'default_hotwords', 'TEXT'):
            app.logger.info("Added default_hotwords column to folder table")
        if add_column_if_not_exists(engine, 'folder', 'default_initial_prompt', 'TEXT'):
            app.logger.info("Added default_initial_prompt column to folder table")

        # Create indexes for token lookups (for faster token verification)
        try:
            if create_index_if_not_exists(engine, 'ix_user_email_verification_token', 'user', 'email_verification_token'):
                app.logger.info("Created index ix_user_email_verification_token on user.email_verification_token")
            if create_index_if_not_exists(engine, 'ix_user_password_reset_token', 'user', 'password_reset_token'):
                app.logger.info("Created index ix_user_password_reset_token on user.password_reset_token")
        except Exception as e:
            app.logger.warning(f"Could not create token indexes: {e}")
        if add_column_if_not_exists(engine, 'tag', 'naming_template_id', 'INTEGER'):
            app.logger.info("Added naming_template_id column to tag table")

        # Export template assignments for tags and folders
        if add_column_if_not_exists(engine, 'tag', 'export_template_id', 'INTEGER'):
            app.logger.info("Added export_template_id column to tag table")
        if add_column_if_not_exists(engine, 'folder', 'export_template_id', 'INTEGER'):
            app.logger.info("Added export_template_id column to folder table")

        # Add source tracking columns to internal_share table
        if add_column_if_not_exists(engine, 'internal_share', 'source_type', "VARCHAR(20) DEFAULT 'manual'"):
            app.logger.info("Added source_type column to internal_share table")

        if add_column_if_not_exists(engine, 'internal_share', 'source_tag_id', 'INTEGER'):
            app.logger.info("Added source_tag_id column to internal_share table")

            # Migrate existing shares: infer source based on group tag presence
            try:
                with engine.connect() as conn:
                    # For each existing share, check if it was likely created by a group tag
                    # by looking for group tags on the recording where the shared user is a group member
                    result = conn.execute(text('''
                        UPDATE internal_share
                        SET source_type = 'group_tag',
                            source_tag_id = (
                                SELECT t.id FROM tag t
                                INNER JOIN recording_tags rt ON rt.tag_id = t.id
                                INNER JOIN group_membership gm ON gm.group_id = t.group_id
                                WHERE rt.recording_id = internal_share.recording_id
                                AND gm.user_id = internal_share.shared_with_user_id
                                AND t.group_id IS NOT NULL
                                AND (t.auto_share_on_apply = TRUE OR t.share_with_group_lead = TRUE)
                                LIMIT 1
                            )
                        WHERE source_type = 'manual'
                        AND EXISTS (
                            SELECT 1 FROM tag t
                            INNER JOIN recording_tags rt ON rt.tag_id = t.id
                            INNER JOIN group_membership gm ON gm.group_id = t.group_id
                            WHERE rt.recording_id = internal_share.recording_id
                            AND gm.user_id = internal_share.shared_with_user_id
                            AND t.group_id IS NOT NULL
                            AND (t.auto_share_on_apply = TRUE OR t.share_with_group_lead = TRUE)
                        )
                    '''))
                    conn.commit()
                    app.logger.info("Inferred source tracking for existing shares based on group tag presence")
            except Exception as e:
                app.logger.warning(f"Could not infer source tracking for existing shares: {e}")

            # Update existing records to have proper order values (approximate by tag_id)
            try:
                with engine.connect() as conn:
                    # Get existing associations without order values and assign them
                    existing_associations = conn.execute(text('''
                        SELECT recording_id, tag_id,
                               ROW_NUMBER() OVER (PARTITION BY recording_id ORDER BY tag_id) as row_num
                        FROM recording_tags
                        WHERE "order" = 0
                    ''')).fetchall()

                    for assoc in existing_associations:
                        conn.execute(text('''
                            UPDATE recording_tags
                            SET "order" = :order_num
                            WHERE recording_id = :rec_id AND tag_id = :tag_id
                        '''), {"order_num": assoc.row_num, "rec_id": assoc.recording_id, "tag_id": assoc.tag_id})

                    conn.commit()
                    app.logger.info(f"Updated order values for {len(existing_associations)} existing tag associations")
            except Exception as e:
                app.logger.warning(f"Could not update existing tag order values: {e}")

        # Add per-user status columns to shared_recording_state table
        if add_column_if_not_exists(engine, 'shared_recording_state', 'is_inbox', 'BOOLEAN DEFAULT 1'):
            app.logger.info("Added is_inbox column to shared_recording_state table")

        # Handle is_starred -> is_highlighted migration
        inspector = inspect(engine)
        if 'shared_recording_state' in inspector.get_table_names():
            columns = [col['name'] for col in inspector.get_columns('shared_recording_state')]
            has_is_starred = 'is_starred' in columns
            has_is_highlighted = 'is_highlighted' in columns

            if has_is_starred and not has_is_highlighted:
                # Rename is_starred to is_highlighted by copying data
                try:
                    # Add is_highlighted column using utility (handles PostgreSQL boolean defaults)
                    add_column_if_not_exists(engine, 'shared_recording_state', 'is_highlighted', 'BOOLEAN DEFAULT 0')
                    # Copy data from is_starred to is_highlighted
                    with engine.connect() as conn:
                        conn.execute(text('UPDATE shared_recording_state SET is_highlighted = is_starred'))
                        conn.commit()
                    app.logger.info("Migrated is_starred to is_highlighted in shared_recording_state table")
                    # Note: We keep is_starred for now to avoid breaking existing code during transition
                except Exception as e:
                    app.logger.warning(f"Could not migrate is_starred to is_highlighted: {e}")
            elif not has_is_highlighted:
                # Neither column exists, add is_highlighted
                if add_column_if_not_exists(engine, 'shared_recording_state', 'is_highlighted', 'BOOLEAN DEFAULT 0'):
                    app.logger.info("Added is_highlighted column to shared_recording_state table")

        # Migrate meeting_date from DATE to DATETIME format
        # This migration handles both:
        # 1. Converting existing DATE columns to DATETIME (for fresh pulls)
        # 2. Restoring NULL dates from created_at (for failed migrations)
        try:
            inspector = inspect(engine)
            columns_info = {col['name']: col for col in inspector.get_columns('recording')}

            if 'meeting_date' in columns_info:
                col_type = str(columns_info['meeting_date']['type']).upper()

                # Check if column needs migration from DATE to DATETIME
                needs_migration = False

                # For SQLite: Both DATE and DATETIME are TEXT, check data format
                if engine.name == 'sqlite':
                    with engine.connect() as conn:
                        # Check if we have date-only format (no time component)
                        result = conn.execute(text("""
                            SELECT meeting_date FROM recording
                            WHERE meeting_date IS NOT NULL
                            AND meeting_date NOT LIKE '%:%'
                            LIMIT 1
                        """))
                        has_date_only = result.fetchone() is not None
                        needs_migration = has_date_only

                # For PostgreSQL/MySQL: Check actual column type
                elif 'DATE' in col_type and 'DATETIME' not in col_type and 'TIMESTAMP' not in col_type:
                    needs_migration = True

                if needs_migration:
                    app.logger.info(f"Migrating meeting_date from DATE to DATETIME format (engine: {engine.name})")

                    with engine.connect() as conn:
                        if engine.name == 'sqlite':
                            # SQLite: Add time component to date-only values
                            conn.execute(text("""
                                UPDATE recording
                                SET meeting_date = datetime(date(meeting_date) || ' 12:00:00')
                                WHERE meeting_date IS NOT NULL
                                AND meeting_date NOT LIKE '%:%'
                            """))
                            conn.commit()
                            app.logger.info("Migrated SQLite meeting_date to include time")

                        elif engine.name == 'postgresql':
                            # PostgreSQL: Change column type
                            conn.execute(text("""
                                ALTER TABLE recording
                                ALTER COLUMN meeting_date TYPE TIMESTAMP
                                USING (meeting_date + TIME '12:00:00')
                            """))
                            conn.commit()
                            app.logger.info("Migrated PostgreSQL meeting_date to TIMESTAMP")

                        elif engine.name == 'mysql':
                            # MySQL: Change column type
                            conn.execute(text("""
                                ALTER TABLE recording
                                MODIFY COLUMN meeting_date DATETIME
                            """))
                            # Add time component to existing date values
                            conn.execute(text("""
                                UPDATE recording
                                SET meeting_date = TIMESTAMP(meeting_date, '12:00:00')
                                WHERE meeting_date IS NOT NULL
                            """))
                            conn.commit()
                            app.logger.info("Migrated MySQL meeting_date to DATETIME")
                else:
                    app.logger.info("meeting_date already in DATETIME format, skipping migration")

                # Safety net: Restore any NULL meeting_dates from created_at
                with engine.connect() as conn:
                    result = conn.execute(text("""
                        SELECT COUNT(*) FROM recording
                        WHERE meeting_date IS NULL AND created_at IS NOT NULL
                    """))
                    null_count = result.scalar()

                    if null_count and null_count > 0:
                        conn.execute(text("""
                            UPDATE recording
                            SET meeting_date = created_at
                            WHERE meeting_date IS NULL AND created_at IS NOT NULL
                        """))
                        conn.commit()
                        app.logger.info(f"Restored {null_count} NULL meeting dates from created_at")

        except Exception as e:
            app.logger.warning(f"Error during meeting_date migration: {e}")
            app.logger.warning("New recordings will work correctly, but existing dates may need manual migration")

        # Add index on TranscriptChunk.speaker_name for performance
        # This improves speaker rename operations which update all chunks
        try:
            inspector = inspect(engine)
            if 'transcript_chunk' in inspector.get_table_names():
                existing_indexes = [idx['name'] for idx in inspector.get_indexes('transcript_chunk')]

                # Create composite index on (user_id, speaker_name) if it doesn't exist
                if 'idx_user_speaker_name' not in existing_indexes:
                    with engine.connect() as conn:
                        conn.execute(text(
                            'CREATE INDEX IF NOT EXISTS idx_user_speaker_name ON transcript_chunk (user_id, speaker_name)'
                        ))
                        conn.commit()
                        app.logger.info("Created index idx_user_speaker_name on transcript_chunk (user_id, speaker_name) for speaker rename performance")

                # Create single-column index on speaker_name if it doesn't exist
                if 'ix_transcript_chunk_speaker_name' not in existing_indexes:
                    with engine.connect() as conn:
                        conn.execute(text(
                            'CREATE INDEX IF NOT EXISTS ix_transcript_chunk_speaker_name ON transcript_chunk (speaker_name)'
                        ))
                        conn.commit()
                        app.logger.info("Created index ix_transcript_chunk_speaker_name on transcript_chunk (speaker_name)")
        except Exception as e:
            app.logger.warning(f"Could not create speaker_name indexes: {e}")

        # Add unique index for SSO subject to prevent duplicate linking
        try:
            if create_index_if_not_exists(engine, 'ix_user_sso_subject', 'user', 'sso_subject', unique=True):
                app.logger.info("Created unique index ix_user_sso_subject on user.sso_subject")
        except Exception as e:
            app.logger.warning(f"Could not create unique index on user.sso_subject: {e}")

        # Add file_hash column for duplicate detection
        if add_column_if_not_exists(engine, 'recording', 'file_hash', 'VARCHAR(64)'):
            app.logger.info("Added file_hash column to recording table")
        try:
            if create_index_if_not_exists(engine, 'ix_recording_user_file_hash', 'recording', 'user_id, file_hash'):
                app.logger.info("Created index ix_recording_user_file_hash on recording (user_id, file_hash)")
        except Exception as e:
            app.logger.warning(f"Could not create index on recording (user_id, file_hash): {e}")

        # Add folder_id column to recording table for folders feature
        if add_column_if_not_exists(engine, 'recording', 'folder_id', 'INTEGER'):
            app.logger.info("Added folder_id column to recording table")
        # Create index for folder_id
        try:
            if create_index_if_not_exists(engine, 'ix_recording_folder_id', 'recording', 'folder_id'):
                app.logger.info("Created index ix_recording_folder_id on recording.folder_id")
        except Exception as e:
            app.logger.warning(f"Could not create index on recording.folder_id: {e}")

        # Add indexes for audit log tables (Loi 25 compliance)
        try:
            inspector = inspect(engine)
            if 'access_log' in inspector.get_table_names():
                if create_index_if_not_exists(engine, 'ix_access_log_user_id', 'access_log', 'user_id'):
                    app.logger.info("Created index ix_access_log_user_id")
                if create_index_if_not_exists(engine, 'ix_access_log_resource', 'access_log', 'resource_type, resource_id'):
                    app.logger.info("Created index ix_access_log_resource")
            if 'auth_log' in inspector.get_table_names():
                if create_index_if_not_exists(engine, 'ix_auth_log_user_id', 'auth_log', 'user_id'):
                    app.logger.info("Created index ix_auth_log_user_id")
        except Exception as e:
            app.logger.warning(f"Could not create audit log indexes: {e}")

        # Initialize default system settings
        if not SystemSetting.query.filter_by(key='transcript_length_limit').first():
            SystemSetting.set_setting(
                key='transcript_length_limit',
                value='50000',
                description='Maximum number of characters to send from transcript to LLM for summarization and chat. Use -1 for no limit.',
                setting_type='integer'
            )
            app.logger.info("Initialized default transcript_length_limit setting")

        if not SystemSetting.query.filter_by(key='max_file_size_mb').first():
            SystemSetting.set_setting(
                key='max_file_size_mb',
                value='10000',
                description='Maximum file size allowed for audio uploads in megabytes (MB).',
                setting_type='integer'
            )
            app.logger.info("Initialized default max_file_size_mb setting")

        if not SystemSetting.query.filter_by(key='asr_timeout_seconds').first():
            SystemSetting.set_setting(
                key='asr_timeout_seconds',
                value='1800',
                description='Maximum time in seconds to wait for ASR transcription to complete. Default is 1800 seconds (30 minutes).',
                setting_type='integer'
            )
            app.logger.info("Initialized default asr_timeout_seconds setting")

        if not SystemSetting.query.filter_by(key='admin_default_summary_prompt').first():
            default_prompt = """Tu es un assistant expert en prise de notes. Analyse cette transcription et extrais toutes les informations importantes en français.

  ## 📝 RÉSUMÉ
  Synthèse claire et concise de la conversation en 4-6 phrases.

  ## 🔑 POINTS CLÉS
  • Les informations essentielles à retenir
  • Les faits importants mentionnés
  • Les opinions ou positions exprimées

  ## 📊 DONNÉES & CHIFFRES
  • Montants, dates, délais, pourcentages
  • Noms de personnes, entreprises, lieux
  • Références techniques ou spécifiques

  ## 💬 CITATIONS MARQUANTES
  > Phrases importantes ou révélatrices (entre guillemets)

  ## ⚠️ PROBLÈMES & PRÉOCCUPATIONS
  • Difficultés ou obstacles mentionnés
  • Risques identifiés
  • Points de friction ou désaccords

  ## 💡 IDÉES & SUGGESTIONS
  • Propositions faites durant la conversation
  • Solutions envisagées
  • Opportunités mentionnelles

  ## ✅ DÉCISIONS & PROCHAINES ÉTAPES
  • Ce qui a été décidé ou convenu
  • Actions à entreprendre
  • Suivis nécessaires

  ---
  Instructions : Sois exhaustif et n'omets aucun détail pertinent. Utilise un langage clair et professionnel. Adapte les sections selon le contenu — si une section ne s'applique pas, omets-la."""
            SystemSetting.set_setting(
                key='admin_default_summary_prompt',
                value=default_prompt,
                description='Default summarization prompt used when users have not set their own prompt. This serves as the base prompt for all users.',
                setting_type='string'
            )
            app.logger.info("Initialized admin_default_summary_prompt setting")

        if not SystemSetting.query.filter_by(key='recording_disclaimer').first():
            SystemSetting.set_setting(
                key='recording_disclaimer',
                value='',
                description='Legal disclaimer shown to users before recording starts. Supports Markdown formatting. Leave empty to disable.',
                setting_type='string'
            )
            app.logger.info("Initialized recording_disclaimer setting")

        if not SystemSetting.query.filter_by(key='upload_disclaimer').first():
            SystemSetting.set_setting(
                key='upload_disclaimer',
                value='',
                description='Legal disclaimer shown before file uploads. Supports Markdown. Leave empty to disable.',
                setting_type='string'
            )
            app.logger.info("Initialized upload_disclaimer setting")

        if not SystemSetting.query.filter_by(key='custom_banner').first():
            SystemSetting.set_setting(
                key='custom_banner',
                value='',
                description='Custom banner shown at the top of the page. Supports Markdown. Leave empty to disable.',
                setting_type='string'
            )
            app.logger.info("Initialized custom_banner setting")

        if not SystemSetting.query.filter_by(key='disable_auto_summarization').first():
            SystemSetting.set_setting(
                key='disable_auto_summarization',
                value='false',
                description='Disable automatic summarization after transcription completes. When enabled, recordings will only be transcribed and users must manually trigger summarization.',
                setting_type='boolean'
            )
            app.logger.info("Initialized disable_auto_summarization setting")

        if not SystemSetting.query.filter_by(key='enable_folders').first():
            SystemSetting.set_setting(
                key='enable_folders',
                value='true',
                description='Enable the Folders feature, allowing users to organize recordings into folders with custom prompts and ASR settings.',
                setting_type='boolean'
            )
            app.logger.info("Initialized enable_folders setting")

        # Process existing recordings for inquire mode (chunk and embed them)
        # Only run if inquire mode is enabled
        if ENABLE_INQUIRE_MODE:
            # Use a file lock to prevent multiple workers from running this simultaneously
            lock_file_path = os.path.join(tempfile.gettempdir(), 'inquire_migration.lock')

            try:
                with open(lock_file_path, 'w') as lock_file:
                    # Try to acquire exclusive lock (non-blocking)
                    try:
                        fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
                        app.logger.info("Acquired migration lock, checking for existing recordings that need chunking for inquire mode...")

                        completed_recordings = Recording.query.filter_by(status='COMPLETED').all()
                        recordings_needing_processing = []

                        for recording in completed_recordings:
                            if recording.transcription:  # Has transcription
                                chunk_count = TranscriptChunk.query.filter_by(recording_id=recording.id).count()
                                if chunk_count == 0:  # No chunks yet
                                    recordings_needing_processing.append(recording)

                        if recordings_needing_processing:
                            app.logger.info(f"Found {len(recordings_needing_processing)} recordings that need chunking for inquire mode")
                            app.logger.info("Processing first 10 recordings automatically. Use admin API or migration script for remaining recordings.")

                            # Process first 10 recordings automatically to avoid long startup times
                            batch_size = min(10, len(recordings_needing_processing))
                            processed = 0

                            for i in range(batch_size):
                                recording = recordings_needing_processing[i]
                                try:
                                    success = process_recording_chunks(recording.id)
                                    if success:
                                        processed += 1
                                        app.logger.info(f"Processed chunks for recording: {recording.title} ({recording.id})")
                                except Exception as e:
                                    app.logger.warning(f"Failed to process chunks for recording {recording.id}: {e}")

                            remaining = len(recordings_needing_processing) - processed
                            if remaining > 0:
                                app.logger.info(f"Successfully processed {processed} recordings. {remaining} recordings remaining.")
                                app.logger.info("Use the admin migration API or run 'python migrate_existing_recordings.py' to process remaining recordings.")
                            else:
                                app.logger.info(f"Successfully processed all {processed} recordings for inquire mode.")
                        else:
                            app.logger.info("All existing recordings are already processed for inquire mode.")

                    except BlockingIOError:
                        app.logger.info("Migration already running in another worker, skipping...")

            except Exception as e:
                app.logger.warning(f"Error during existing recordings migration: {e}")
                app.logger.info("Existing recordings can be migrated later using the admin API or migration script.")

    except Exception as e:
        app.logger.error(f"Error during database migration: {e}")


if __name__ == '__main__':
    # For standalone migration script
    from src.app import app
    with app.app_context():
        initialize_database(app)