Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)
This commit is contained in:
748
src/init_db.py
Normal file
748
src/init_db.py
Normal file
@@ -0,0 +1,748 @@
|
||||
"""
|
||||
Database initialization and migration logic.
|
||||
|
||||
This module handles:
|
||||
- Database schema creation
|
||||
- Column migrations (adding missing columns to existing tables)
|
||||
- Default system settings initialization
|
||||
- Existing recordings migration for inquire mode
|
||||
"""
|
||||
|
||||
import os
|
||||
import fcntl
|
||||
import tempfile
|
||||
from sqlalchemy import text, inspect
|
||||
|
||||
from src.database import db
|
||||
from src.models import Recording, TranscriptChunk, SystemSetting
|
||||
from src.services.embeddings import process_recording_chunks
|
||||
from src.utils import add_column_if_not_exists, migrate_column_type, create_index_if_not_exists
|
||||
|
||||
# Configuration
|
||||
ENABLE_INQUIRE_MODE = os.environ.get('ENABLE_INQUIRE_MODE', 'false').lower() == 'true'
|
||||
|
||||
|
||||
def initialize_database(app):
|
||||
"""
|
||||
Initialize database schema and run migrations.
|
||||
|
||||
This function should be called within an app context.
|
||||
"""
|
||||
db.create_all()
|
||||
|
||||
# Check and add new columns if they don't exist
|
||||
engine = db.engine
|
||||
|
||||
# Enable WAL mode for SQLite (better concurrent write performance)
|
||||
if engine.name == 'sqlite':
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
conn.execute(text('PRAGMA journal_mode=WAL'))
|
||||
conn.commit()
|
||||
app.logger.info("SQLite WAL mode enabled for better concurrency")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not enable WAL mode: {e}")
|
||||
|
||||
try:
|
||||
# Add is_inbox column with default value of 1 (True)
|
||||
if add_column_if_not_exists(engine, 'recording', 'is_inbox', 'BOOLEAN DEFAULT 1'):
|
||||
app.logger.info("Added is_inbox column to recording table")
|
||||
|
||||
# Add is_highlighted column with default value of 0 (False)
|
||||
if add_column_if_not_exists(engine, 'recording', 'is_highlighted', 'BOOLEAN DEFAULT 0'):
|
||||
app.logger.info("Added is_highlighted column to recording table")
|
||||
|
||||
# Add language preference columns to User table
|
||||
if add_column_if_not_exists(engine, 'user', 'transcription_language', 'VARCHAR(10)'):
|
||||
app.logger.info("Added transcription_language column to user table")
|
||||
|
||||
# Add extract_events column to User table
|
||||
if add_column_if_not_exists(engine, 'user', 'extract_events', 'BOOLEAN DEFAULT 0'):
|
||||
app.logger.info("Added extract_events column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'output_language', 'VARCHAR(50)'):
|
||||
app.logger.info("Added output_language column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'summary_prompt', 'TEXT'):
|
||||
app.logger.info("Added summary_prompt column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'name', 'VARCHAR(100)'):
|
||||
app.logger.info("Added name column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'job_title', 'VARCHAR(100)'):
|
||||
app.logger.info("Added job_title column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'company', 'VARCHAR(100)'):
|
||||
app.logger.info("Added company column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'diarize', 'BOOLEAN'):
|
||||
app.logger.info("Added diarize column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'ui_language', "VARCHAR(10) DEFAULT 'en'"):
|
||||
app.logger.info("Added ui_language column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'sso_provider', 'VARCHAR(100)'):
|
||||
app.logger.info("Added sso_provider column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'sso_subject', 'VARCHAR(255)'):
|
||||
app.logger.info("Added sso_subject column to user table")
|
||||
|
||||
# Make password column nullable for SSO users
|
||||
try:
|
||||
inspector = inspect(engine)
|
||||
if 'user' in inspector.get_table_names():
|
||||
if engine.name == 'sqlite':
|
||||
# SQLite doesn't support ALTER COLUMN, so we need to check and recreate
|
||||
with engine.connect() as conn:
|
||||
result = conn.execute(text("SELECT sql FROM sqlite_master WHERE type='table' AND name='user'"))
|
||||
schema = result.scalar()
|
||||
|
||||
if schema and 'password VARCHAR(60) NOT NULL' in schema:
|
||||
app.logger.info("Migrating user table to make password nullable for SSO support...")
|
||||
|
||||
conn.execute(text("""
|
||||
CREATE TABLE user_new (
|
||||
id INTEGER NOT NULL,
|
||||
username VARCHAR(20) NOT NULL,
|
||||
email VARCHAR(120) NOT NULL,
|
||||
password VARCHAR(60),
|
||||
is_admin BOOLEAN,
|
||||
can_share_publicly BOOLEAN,
|
||||
transcription_language VARCHAR(10),
|
||||
output_language VARCHAR(50),
|
||||
ui_language VARCHAR(10),
|
||||
summary_prompt TEXT,
|
||||
extract_events BOOLEAN,
|
||||
name VARCHAR(100),
|
||||
job_title VARCHAR(100),
|
||||
company VARCHAR(100),
|
||||
diarize BOOLEAN,
|
||||
sso_provider VARCHAR(100),
|
||||
sso_subject VARCHAR(255),
|
||||
PRIMARY KEY (id),
|
||||
UNIQUE (username),
|
||||
UNIQUE (email)
|
||||
)
|
||||
"""))
|
||||
conn.execute(text("""
|
||||
INSERT INTO user_new
|
||||
SELECT id, username, email, password, is_admin, can_share_publicly,
|
||||
transcription_language, output_language, ui_language,
|
||||
summary_prompt, extract_events, name, job_title, company,
|
||||
diarize, sso_provider, sso_subject
|
||||
FROM user
|
||||
"""))
|
||||
conn.execute(text("DROP TABLE user"))
|
||||
conn.execute(text("ALTER TABLE user_new RENAME TO user"))
|
||||
conn.execute(text('CREATE UNIQUE INDEX IF NOT EXISTS ix_user_sso_subject ON "user" (sso_subject)'))
|
||||
conn.commit()
|
||||
app.logger.info("Successfully made password column nullable for SSO support")
|
||||
else:
|
||||
app.logger.info("Password column is already nullable, skipping migration")
|
||||
|
||||
elif engine.name == 'postgresql':
|
||||
# PostgreSQL supports ALTER COLUMN directly
|
||||
with engine.connect() as conn:
|
||||
result = conn.execute(text("""
|
||||
SELECT is_nullable FROM information_schema.columns
|
||||
WHERE table_name = 'user' AND column_name = 'password'
|
||||
"""))
|
||||
row = result.fetchone()
|
||||
if row and row[0] == 'NO':
|
||||
conn.execute(text('ALTER TABLE "user" ALTER COLUMN password DROP NOT NULL'))
|
||||
conn.commit()
|
||||
app.logger.info("Made password column nullable for SSO support (PostgreSQL)")
|
||||
else:
|
||||
app.logger.info("Password column is already nullable, skipping migration")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not migrate password column to nullable (may cause issues with SSO): {e}")
|
||||
|
||||
if add_column_if_not_exists(engine, 'recording', 'mime_type', 'VARCHAR(100)'):
|
||||
app.logger.info("Added mime_type column to recording table")
|
||||
if add_column_if_not_exists(engine, 'recording', 'completed_at', 'DATETIME'):
|
||||
app.logger.info("Added completed_at column to recording table")
|
||||
if add_column_if_not_exists(engine, 'recording', 'processing_time_seconds', 'INTEGER'):
|
||||
app.logger.info("Added processing_time_seconds column to recording table")
|
||||
if add_column_if_not_exists(engine, 'recording', 'transcription_duration_seconds', 'INTEGER'):
|
||||
app.logger.info("Added transcription_duration_seconds column to recording table")
|
||||
if add_column_if_not_exists(engine, 'recording', 'summarization_duration_seconds', 'INTEGER'):
|
||||
app.logger.info("Added summarization_duration_seconds column to recording table")
|
||||
if add_column_if_not_exists(engine, 'recording', 'processing_source', "VARCHAR(50) DEFAULT 'upload'"):
|
||||
app.logger.info("Added processing_source column to recording table")
|
||||
if add_column_if_not_exists(engine, 'recording', 'error_message', 'TEXT'):
|
||||
app.logger.info("Added error_message column to recording table")
|
||||
|
||||
# Add columns to recording_tags for order tracking
|
||||
if add_column_if_not_exists(engine, 'recording_tags', 'added_at', 'DATETIME'):
|
||||
app.logger.info("Added added_at column to recording_tags table")
|
||||
if add_column_if_not_exists(engine, 'recording_tags', 'order', '"order" INTEGER DEFAULT 0'):
|
||||
app.logger.info("Added order column to recording_tags table")
|
||||
|
||||
# Add auto-deletion and retention columns
|
||||
if add_column_if_not_exists(engine, 'recording', 'audio_deleted_at', 'DATETIME'):
|
||||
app.logger.info("Added audio_deleted_at column to recording table")
|
||||
if add_column_if_not_exists(engine, 'recording', 'deletion_exempt', 'BOOLEAN DEFAULT 0'):
|
||||
app.logger.info("Added deletion_exempt column to recording table")
|
||||
if add_column_if_not_exists(engine, 'tag', 'protect_from_deletion', 'BOOLEAN DEFAULT 0'):
|
||||
app.logger.info("Added protect_from_deletion column to tag table")
|
||||
|
||||
# Add speaker embeddings column for storing voice embeddings from diarization
|
||||
if add_column_if_not_exists(engine, 'recording', 'speaker_embeddings', 'JSON'):
|
||||
app.logger.info("Added speaker_embeddings column to recording table")
|
||||
|
||||
# Add speaker voice profile embedding fields
|
||||
if add_column_if_not_exists(engine, 'speaker', 'average_embedding', 'BLOB'):
|
||||
app.logger.info("Added average_embedding column to speaker table")
|
||||
if add_column_if_not_exists(engine, 'speaker', 'embeddings_history', 'JSON'):
|
||||
app.logger.info("Added embeddings_history column to speaker table")
|
||||
if add_column_if_not_exists(engine, 'speaker', 'embedding_count', 'INTEGER DEFAULT 0'):
|
||||
app.logger.info("Added embedding_count column to speaker table")
|
||||
if add_column_if_not_exists(engine, 'speaker', 'confidence_score', 'REAL'):
|
||||
app.logger.info("Added confidence_score column to speaker table")
|
||||
|
||||
# Add is_new_upload column to processing_job table for tracking upload vs reprocessing jobs
|
||||
if add_column_if_not_exists(engine, 'processing_job', 'is_new_upload', 'BOOLEAN DEFAULT 0'):
|
||||
app.logger.info("Added is_new_upload column to processing_job table")
|
||||
|
||||
if add_column_if_not_exists(engine, 'tag', 'group_id', 'INTEGER'):
|
||||
app.logger.info("Added group_id column to tag table")
|
||||
|
||||
if add_column_if_not_exists(engine, 'tag', 'retention_days', 'INTEGER'):
|
||||
app.logger.info("Added retention_days column to tag table")
|
||||
|
||||
# Migrate existing protected tags to use retention_days = -1 for consistency
|
||||
# This standardizes the protection mechanism: retention_days = -1 means protected/infinite retention
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
# Find tags with protect_from_deletion=True but retention_days != -1
|
||||
result = conn.execute(text("""
|
||||
SELECT COUNT(*) FROM tag
|
||||
WHERE protect_from_deletion = TRUE
|
||||
AND (retention_days IS NULL OR retention_days != -1)
|
||||
"""))
|
||||
count = result.scalar()
|
||||
|
||||
if count and count > 0:
|
||||
# Migrate these tags to use retention_days = -1
|
||||
conn.execute(text("""
|
||||
UPDATE tag
|
||||
SET retention_days = -1
|
||||
WHERE protect_from_deletion = TRUE
|
||||
AND (retention_days IS NULL OR retention_days != -1)
|
||||
"""))
|
||||
conn.commit()
|
||||
app.logger.info(f"Migrated {count} protected tags to use retention_days=-1 (standardized protection format)")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not migrate protected tags to retention_days=-1: {e}")
|
||||
|
||||
if add_column_if_not_exists(engine, 'tag', 'auto_share_on_apply', 'BOOLEAN DEFAULT 1'):
|
||||
app.logger.info("Added auto_share_on_apply column to tag table")
|
||||
|
||||
if add_column_if_not_exists(engine, 'tag', 'share_with_group_lead', 'BOOLEAN DEFAULT 1'):
|
||||
app.logger.info("Added share_with_group_lead column to tag table")
|
||||
|
||||
if add_column_if_not_exists(engine, 'user', 'can_share_publicly', 'BOOLEAN DEFAULT 1'):
|
||||
app.logger.info("Added can_share_publicly column to user table")
|
||||
|
||||
# Token budget for rate limiting
|
||||
if add_column_if_not_exists(engine, 'user', 'monthly_token_budget', 'INTEGER'):
|
||||
app.logger.info("Added monthly_token_budget column to user table")
|
||||
|
||||
# Transcription budget for rate limiting (in seconds)
|
||||
if add_column_if_not_exists(engine, 'user', 'monthly_transcription_budget', 'INTEGER'):
|
||||
app.logger.info("Added monthly_transcription_budget column to user table")
|
||||
|
||||
# Naming templates feature
|
||||
if add_column_if_not_exists(engine, 'user', 'default_naming_template_id', 'INTEGER'):
|
||||
app.logger.info("Added default_naming_template_id column to user table")
|
||||
|
||||
# Email verification fields
|
||||
email_verified_added = add_column_if_not_exists(engine, 'user', 'email_verified', 'BOOLEAN DEFAULT 0')
|
||||
if email_verified_added:
|
||||
app.logger.info("Added email_verified column to user table")
|
||||
# Set all existing users to email_verified=True (grandfathered)
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
conn.execute(text('UPDATE "user" SET email_verified = TRUE WHERE email_verified = FALSE OR email_verified IS NULL'))
|
||||
conn.commit()
|
||||
app.logger.info("Set email_verified=True for all existing users (grandfathered)")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not update existing users email_verified status: {e}")
|
||||
|
||||
if add_column_if_not_exists(engine, 'user', 'email_verification_token', 'VARCHAR(200)'):
|
||||
app.logger.info("Added email_verification_token column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'email_verification_sent_at', 'DATETIME'):
|
||||
app.logger.info("Added email_verification_sent_at column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'password_reset_token', 'VARCHAR(200)'):
|
||||
app.logger.info("Added password_reset_token column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'password_reset_sent_at', 'DATETIME'):
|
||||
app.logger.info("Added password_reset_sent_at column to user table")
|
||||
|
||||
# Auto speaker labelling settings
|
||||
if add_column_if_not_exists(engine, 'user', 'auto_speaker_labelling', 'BOOLEAN DEFAULT 0'):
|
||||
app.logger.info("Added auto_speaker_labelling column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'auto_speaker_labelling_threshold', "VARCHAR(10) DEFAULT 'medium'"):
|
||||
app.logger.info("Added auto_speaker_labelling_threshold column to user table")
|
||||
|
||||
# Auto summarization setting (per-user, default enabled)
|
||||
if add_column_if_not_exists(engine, 'user', 'auto_summarization', 'BOOLEAN DEFAULT 1'):
|
||||
app.logger.info("Added auto_summarization column to user table")
|
||||
|
||||
# Transcription hints (hotwords and initial prompt for improving ASR accuracy)
|
||||
if add_column_if_not_exists(engine, 'user', 'transcription_hotwords', 'TEXT'):
|
||||
app.logger.info("Added transcription_hotwords column to user table")
|
||||
if add_column_if_not_exists(engine, 'user', 'transcription_initial_prompt', 'TEXT'):
|
||||
app.logger.info("Added transcription_initial_prompt column to user table")
|
||||
if add_column_if_not_exists(engine, 'tag', 'default_hotwords', 'TEXT'):
|
||||
app.logger.info("Added default_hotwords column to tag table")
|
||||
if add_column_if_not_exists(engine, 'tag', 'default_initial_prompt', 'TEXT'):
|
||||
app.logger.info("Added default_initial_prompt column to tag table")
|
||||
if add_column_if_not_exists(engine, 'folder', 'default_hotwords', 'TEXT'):
|
||||
app.logger.info("Added default_hotwords column to folder table")
|
||||
if add_column_if_not_exists(engine, 'folder', 'default_initial_prompt', 'TEXT'):
|
||||
app.logger.info("Added default_initial_prompt column to folder table")
|
||||
|
||||
# Create indexes for token lookups (for faster token verification)
|
||||
try:
|
||||
if create_index_if_not_exists(engine, 'ix_user_email_verification_token', 'user', 'email_verification_token'):
|
||||
app.logger.info("Created index ix_user_email_verification_token on user.email_verification_token")
|
||||
if create_index_if_not_exists(engine, 'ix_user_password_reset_token', 'user', 'password_reset_token'):
|
||||
app.logger.info("Created index ix_user_password_reset_token on user.password_reset_token")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not create token indexes: {e}")
|
||||
if add_column_if_not_exists(engine, 'tag', 'naming_template_id', 'INTEGER'):
|
||||
app.logger.info("Added naming_template_id column to tag table")
|
||||
|
||||
# Export template assignments for tags and folders
|
||||
if add_column_if_not_exists(engine, 'tag', 'export_template_id', 'INTEGER'):
|
||||
app.logger.info("Added export_template_id column to tag table")
|
||||
if add_column_if_not_exists(engine, 'folder', 'export_template_id', 'INTEGER'):
|
||||
app.logger.info("Added export_template_id column to folder table")
|
||||
|
||||
# Add source tracking columns to internal_share table
|
||||
if add_column_if_not_exists(engine, 'internal_share', 'source_type', "VARCHAR(20) DEFAULT 'manual'"):
|
||||
app.logger.info("Added source_type column to internal_share table")
|
||||
|
||||
if add_column_if_not_exists(engine, 'internal_share', 'source_tag_id', 'INTEGER'):
|
||||
app.logger.info("Added source_tag_id column to internal_share table")
|
||||
|
||||
# Migrate existing shares: infer source based on group tag presence
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
# For each existing share, check if it was likely created by a group tag
|
||||
# by looking for group tags on the recording where the shared user is a group member
|
||||
result = conn.execute(text('''
|
||||
UPDATE internal_share
|
||||
SET source_type = 'group_tag',
|
||||
source_tag_id = (
|
||||
SELECT t.id FROM tag t
|
||||
INNER JOIN recording_tags rt ON rt.tag_id = t.id
|
||||
INNER JOIN group_membership gm ON gm.group_id = t.group_id
|
||||
WHERE rt.recording_id = internal_share.recording_id
|
||||
AND gm.user_id = internal_share.shared_with_user_id
|
||||
AND t.group_id IS NOT NULL
|
||||
AND (t.auto_share_on_apply = TRUE OR t.share_with_group_lead = TRUE)
|
||||
LIMIT 1
|
||||
)
|
||||
WHERE source_type = 'manual'
|
||||
AND EXISTS (
|
||||
SELECT 1 FROM tag t
|
||||
INNER JOIN recording_tags rt ON rt.tag_id = t.id
|
||||
INNER JOIN group_membership gm ON gm.group_id = t.group_id
|
||||
WHERE rt.recording_id = internal_share.recording_id
|
||||
AND gm.user_id = internal_share.shared_with_user_id
|
||||
AND t.group_id IS NOT NULL
|
||||
AND (t.auto_share_on_apply = TRUE OR t.share_with_group_lead = TRUE)
|
||||
)
|
||||
'''))
|
||||
conn.commit()
|
||||
app.logger.info("Inferred source tracking for existing shares based on group tag presence")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not infer source tracking for existing shares: {e}")
|
||||
|
||||
# Update existing records to have proper order values (approximate by tag_id)
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
# Get existing associations without order values and assign them
|
||||
existing_associations = conn.execute(text('''
|
||||
SELECT recording_id, tag_id,
|
||||
ROW_NUMBER() OVER (PARTITION BY recording_id ORDER BY tag_id) as row_num
|
||||
FROM recording_tags
|
||||
WHERE "order" = 0
|
||||
''')).fetchall()
|
||||
|
||||
for assoc in existing_associations:
|
||||
conn.execute(text('''
|
||||
UPDATE recording_tags
|
||||
SET "order" = :order_num
|
||||
WHERE recording_id = :rec_id AND tag_id = :tag_id
|
||||
'''), {"order_num": assoc.row_num, "rec_id": assoc.recording_id, "tag_id": assoc.tag_id})
|
||||
|
||||
conn.commit()
|
||||
app.logger.info(f"Updated order values for {len(existing_associations)} existing tag associations")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not update existing tag order values: {e}")
|
||||
|
||||
# Add per-user status columns to shared_recording_state table
|
||||
if add_column_if_not_exists(engine, 'shared_recording_state', 'is_inbox', 'BOOLEAN DEFAULT 1'):
|
||||
app.logger.info("Added is_inbox column to shared_recording_state table")
|
||||
|
||||
# Handle is_starred -> is_highlighted migration
|
||||
inspector = inspect(engine)
|
||||
if 'shared_recording_state' in inspector.get_table_names():
|
||||
columns = [col['name'] for col in inspector.get_columns('shared_recording_state')]
|
||||
has_is_starred = 'is_starred' in columns
|
||||
has_is_highlighted = 'is_highlighted' in columns
|
||||
|
||||
if has_is_starred and not has_is_highlighted:
|
||||
# Rename is_starred to is_highlighted by copying data
|
||||
try:
|
||||
# Add is_highlighted column using utility (handles PostgreSQL boolean defaults)
|
||||
add_column_if_not_exists(engine, 'shared_recording_state', 'is_highlighted', 'BOOLEAN DEFAULT 0')
|
||||
# Copy data from is_starred to is_highlighted
|
||||
with engine.connect() as conn:
|
||||
conn.execute(text('UPDATE shared_recording_state SET is_highlighted = is_starred'))
|
||||
conn.commit()
|
||||
app.logger.info("Migrated is_starred to is_highlighted in shared_recording_state table")
|
||||
# Note: We keep is_starred for now to avoid breaking existing code during transition
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not migrate is_starred to is_highlighted: {e}")
|
||||
elif not has_is_highlighted:
|
||||
# Neither column exists, add is_highlighted
|
||||
if add_column_if_not_exists(engine, 'shared_recording_state', 'is_highlighted', 'BOOLEAN DEFAULT 0'):
|
||||
app.logger.info("Added is_highlighted column to shared_recording_state table")
|
||||
|
||||
# Migrate meeting_date from DATE to DATETIME format
|
||||
# This migration handles both:
|
||||
# 1. Converting existing DATE columns to DATETIME (for fresh pulls)
|
||||
# 2. Restoring NULL dates from created_at (for failed migrations)
|
||||
try:
|
||||
inspector = inspect(engine)
|
||||
columns_info = {col['name']: col for col in inspector.get_columns('recording')}
|
||||
|
||||
if 'meeting_date' in columns_info:
|
||||
col_type = str(columns_info['meeting_date']['type']).upper()
|
||||
|
||||
# Check if column needs migration from DATE to DATETIME
|
||||
needs_migration = False
|
||||
|
||||
# For SQLite: Both DATE and DATETIME are TEXT, check data format
|
||||
if engine.name == 'sqlite':
|
||||
with engine.connect() as conn:
|
||||
# Check if we have date-only format (no time component)
|
||||
result = conn.execute(text("""
|
||||
SELECT meeting_date FROM recording
|
||||
WHERE meeting_date IS NOT NULL
|
||||
AND meeting_date NOT LIKE '%:%'
|
||||
LIMIT 1
|
||||
"""))
|
||||
has_date_only = result.fetchone() is not None
|
||||
needs_migration = has_date_only
|
||||
|
||||
# For PostgreSQL/MySQL: Check actual column type
|
||||
elif 'DATE' in col_type and 'DATETIME' not in col_type and 'TIMESTAMP' not in col_type:
|
||||
needs_migration = True
|
||||
|
||||
if needs_migration:
|
||||
app.logger.info(f"Migrating meeting_date from DATE to DATETIME format (engine: {engine.name})")
|
||||
|
||||
with engine.connect() as conn:
|
||||
if engine.name == 'sqlite':
|
||||
# SQLite: Add time component to date-only values
|
||||
conn.execute(text("""
|
||||
UPDATE recording
|
||||
SET meeting_date = datetime(date(meeting_date) || ' 12:00:00')
|
||||
WHERE meeting_date IS NOT NULL
|
||||
AND meeting_date NOT LIKE '%:%'
|
||||
"""))
|
||||
conn.commit()
|
||||
app.logger.info("Migrated SQLite meeting_date to include time")
|
||||
|
||||
elif engine.name == 'postgresql':
|
||||
# PostgreSQL: Change column type
|
||||
conn.execute(text("""
|
||||
ALTER TABLE recording
|
||||
ALTER COLUMN meeting_date TYPE TIMESTAMP
|
||||
USING (meeting_date + TIME '12:00:00')
|
||||
"""))
|
||||
conn.commit()
|
||||
app.logger.info("Migrated PostgreSQL meeting_date to TIMESTAMP")
|
||||
|
||||
elif engine.name == 'mysql':
|
||||
# MySQL: Change column type
|
||||
conn.execute(text("""
|
||||
ALTER TABLE recording
|
||||
MODIFY COLUMN meeting_date DATETIME
|
||||
"""))
|
||||
# Add time component to existing date values
|
||||
conn.execute(text("""
|
||||
UPDATE recording
|
||||
SET meeting_date = TIMESTAMP(meeting_date, '12:00:00')
|
||||
WHERE meeting_date IS NOT NULL
|
||||
"""))
|
||||
conn.commit()
|
||||
app.logger.info("Migrated MySQL meeting_date to DATETIME")
|
||||
else:
|
||||
app.logger.info("meeting_date already in DATETIME format, skipping migration")
|
||||
|
||||
# Safety net: Restore any NULL meeting_dates from created_at
|
||||
with engine.connect() as conn:
|
||||
result = conn.execute(text("""
|
||||
SELECT COUNT(*) FROM recording
|
||||
WHERE meeting_date IS NULL AND created_at IS NOT NULL
|
||||
"""))
|
||||
null_count = result.scalar()
|
||||
|
||||
if null_count and null_count > 0:
|
||||
conn.execute(text("""
|
||||
UPDATE recording
|
||||
SET meeting_date = created_at
|
||||
WHERE meeting_date IS NULL AND created_at IS NOT NULL
|
||||
"""))
|
||||
conn.commit()
|
||||
app.logger.info(f"Restored {null_count} NULL meeting dates from created_at")
|
||||
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Error during meeting_date migration: {e}")
|
||||
app.logger.warning("New recordings will work correctly, but existing dates may need manual migration")
|
||||
|
||||
# Add index on TranscriptChunk.speaker_name for performance
|
||||
# This improves speaker rename operations which update all chunks
|
||||
try:
|
||||
inspector = inspect(engine)
|
||||
if 'transcript_chunk' in inspector.get_table_names():
|
||||
existing_indexes = [idx['name'] for idx in inspector.get_indexes('transcript_chunk')]
|
||||
|
||||
# Create composite index on (user_id, speaker_name) if it doesn't exist
|
||||
if 'idx_user_speaker_name' not in existing_indexes:
|
||||
with engine.connect() as conn:
|
||||
conn.execute(text(
|
||||
'CREATE INDEX IF NOT EXISTS idx_user_speaker_name ON transcript_chunk (user_id, speaker_name)'
|
||||
))
|
||||
conn.commit()
|
||||
app.logger.info("Created index idx_user_speaker_name on transcript_chunk (user_id, speaker_name) for speaker rename performance")
|
||||
|
||||
# Create single-column index on speaker_name if it doesn't exist
|
||||
if 'ix_transcript_chunk_speaker_name' not in existing_indexes:
|
||||
with engine.connect() as conn:
|
||||
conn.execute(text(
|
||||
'CREATE INDEX IF NOT EXISTS ix_transcript_chunk_speaker_name ON transcript_chunk (speaker_name)'
|
||||
))
|
||||
conn.commit()
|
||||
app.logger.info("Created index ix_transcript_chunk_speaker_name on transcript_chunk (speaker_name)")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not create speaker_name indexes: {e}")
|
||||
|
||||
# Add unique index for SSO subject to prevent duplicate linking
|
||||
try:
|
||||
if create_index_if_not_exists(engine, 'ix_user_sso_subject', 'user', 'sso_subject', unique=True):
|
||||
app.logger.info("Created unique index ix_user_sso_subject on user.sso_subject")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not create unique index on user.sso_subject: {e}")
|
||||
|
||||
# Add file_hash column for duplicate detection
|
||||
if add_column_if_not_exists(engine, 'recording', 'file_hash', 'VARCHAR(64)'):
|
||||
app.logger.info("Added file_hash column to recording table")
|
||||
try:
|
||||
if create_index_if_not_exists(engine, 'ix_recording_user_file_hash', 'recording', 'user_id, file_hash'):
|
||||
app.logger.info("Created index ix_recording_user_file_hash on recording (user_id, file_hash)")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not create index on recording (user_id, file_hash): {e}")
|
||||
|
||||
# Add folder_id column to recording table for folders feature
|
||||
if add_column_if_not_exists(engine, 'recording', 'folder_id', 'INTEGER'):
|
||||
app.logger.info("Added folder_id column to recording table")
|
||||
# Create index for folder_id
|
||||
try:
|
||||
if create_index_if_not_exists(engine, 'ix_recording_folder_id', 'recording', 'folder_id'):
|
||||
app.logger.info("Created index ix_recording_folder_id on recording.folder_id")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not create index on recording.folder_id: {e}")
|
||||
|
||||
# Add indexes for audit log tables (Loi 25 compliance)
|
||||
try:
|
||||
inspector = inspect(engine)
|
||||
if 'access_log' in inspector.get_table_names():
|
||||
if create_index_if_not_exists(engine, 'ix_access_log_user_id', 'access_log', 'user_id'):
|
||||
app.logger.info("Created index ix_access_log_user_id")
|
||||
if create_index_if_not_exists(engine, 'ix_access_log_resource', 'access_log', 'resource_type, resource_id'):
|
||||
app.logger.info("Created index ix_access_log_resource")
|
||||
if 'auth_log' in inspector.get_table_names():
|
||||
if create_index_if_not_exists(engine, 'ix_auth_log_user_id', 'auth_log', 'user_id'):
|
||||
app.logger.info("Created index ix_auth_log_user_id")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Could not create audit log indexes: {e}")
|
||||
|
||||
# Initialize default system settings
|
||||
if not SystemSetting.query.filter_by(key='transcript_length_limit').first():
|
||||
SystemSetting.set_setting(
|
||||
key='transcript_length_limit',
|
||||
value='50000',
|
||||
description='Maximum number of characters to send from transcript to LLM for summarization and chat. Use -1 for no limit.',
|
||||
setting_type='integer'
|
||||
)
|
||||
app.logger.info("Initialized default transcript_length_limit setting")
|
||||
|
||||
if not SystemSetting.query.filter_by(key='max_file_size_mb').first():
|
||||
SystemSetting.set_setting(
|
||||
key='max_file_size_mb',
|
||||
value='10000',
|
||||
description='Maximum file size allowed for audio uploads in megabytes (MB).',
|
||||
setting_type='integer'
|
||||
)
|
||||
app.logger.info("Initialized default max_file_size_mb setting")
|
||||
|
||||
if not SystemSetting.query.filter_by(key='asr_timeout_seconds').first():
|
||||
SystemSetting.set_setting(
|
||||
key='asr_timeout_seconds',
|
||||
value='1800',
|
||||
description='Maximum time in seconds to wait for ASR transcription to complete. Default is 1800 seconds (30 minutes).',
|
||||
setting_type='integer'
|
||||
)
|
||||
app.logger.info("Initialized default asr_timeout_seconds setting")
|
||||
|
||||
if not SystemSetting.query.filter_by(key='admin_default_summary_prompt').first():
|
||||
default_prompt = """Tu es un assistant expert en prise de notes. Analyse cette transcription et extrais toutes les informations importantes en français.
|
||||
|
||||
## 📝 RÉSUMÉ
|
||||
Synthèse claire et concise de la conversation en 4-6 phrases.
|
||||
|
||||
## 🔑 POINTS CLÉS
|
||||
• Les informations essentielles à retenir
|
||||
• Les faits importants mentionnés
|
||||
• Les opinions ou positions exprimées
|
||||
|
||||
## 📊 DONNÉES & CHIFFRES
|
||||
• Montants, dates, délais, pourcentages
|
||||
• Noms de personnes, entreprises, lieux
|
||||
• Références techniques ou spécifiques
|
||||
|
||||
## 💬 CITATIONS MARQUANTES
|
||||
> Phrases importantes ou révélatrices (entre guillemets)
|
||||
|
||||
## ⚠️ PROBLÈMES & PRÉOCCUPATIONS
|
||||
• Difficultés ou obstacles mentionnés
|
||||
• Risques identifiés
|
||||
• Points de friction ou désaccords
|
||||
|
||||
## 💡 IDÉES & SUGGESTIONS
|
||||
• Propositions faites durant la conversation
|
||||
• Solutions envisagées
|
||||
• Opportunités mentionnelles
|
||||
|
||||
## ✅ DÉCISIONS & PROCHAINES ÉTAPES
|
||||
• Ce qui a été décidé ou convenu
|
||||
• Actions à entreprendre
|
||||
• Suivis nécessaires
|
||||
|
||||
---
|
||||
Instructions : Sois exhaustif et n'omets aucun détail pertinent. Utilise un langage clair et professionnel. Adapte les sections selon le contenu — si une section ne s'applique pas, omets-la."""
|
||||
SystemSetting.set_setting(
|
||||
key='admin_default_summary_prompt',
|
||||
value=default_prompt,
|
||||
description='Default summarization prompt used when users have not set their own prompt. This serves as the base prompt for all users.',
|
||||
setting_type='string'
|
||||
)
|
||||
app.logger.info("Initialized admin_default_summary_prompt setting")
|
||||
|
||||
if not SystemSetting.query.filter_by(key='recording_disclaimer').first():
|
||||
SystemSetting.set_setting(
|
||||
key='recording_disclaimer',
|
||||
value='',
|
||||
description='Legal disclaimer shown to users before recording starts. Supports Markdown formatting. Leave empty to disable.',
|
||||
setting_type='string'
|
||||
)
|
||||
app.logger.info("Initialized recording_disclaimer setting")
|
||||
|
||||
if not SystemSetting.query.filter_by(key='upload_disclaimer').first():
|
||||
SystemSetting.set_setting(
|
||||
key='upload_disclaimer',
|
||||
value='',
|
||||
description='Legal disclaimer shown before file uploads. Supports Markdown. Leave empty to disable.',
|
||||
setting_type='string'
|
||||
)
|
||||
app.logger.info("Initialized upload_disclaimer setting")
|
||||
|
||||
if not SystemSetting.query.filter_by(key='custom_banner').first():
|
||||
SystemSetting.set_setting(
|
||||
key='custom_banner',
|
||||
value='',
|
||||
description='Custom banner shown at the top of the page. Supports Markdown. Leave empty to disable.',
|
||||
setting_type='string'
|
||||
)
|
||||
app.logger.info("Initialized custom_banner setting")
|
||||
|
||||
if not SystemSetting.query.filter_by(key='disable_auto_summarization').first():
|
||||
SystemSetting.set_setting(
|
||||
key='disable_auto_summarization',
|
||||
value='false',
|
||||
description='Disable automatic summarization after transcription completes. When enabled, recordings will only be transcribed and users must manually trigger summarization.',
|
||||
setting_type='boolean'
|
||||
)
|
||||
app.logger.info("Initialized disable_auto_summarization setting")
|
||||
|
||||
if not SystemSetting.query.filter_by(key='enable_folders').first():
|
||||
SystemSetting.set_setting(
|
||||
key='enable_folders',
|
||||
value='true',
|
||||
description='Enable the Folders feature, allowing users to organize recordings into folders with custom prompts and ASR settings.',
|
||||
setting_type='boolean'
|
||||
)
|
||||
app.logger.info("Initialized enable_folders setting")
|
||||
|
||||
# Process existing recordings for inquire mode (chunk and embed them)
|
||||
# Only run if inquire mode is enabled
|
||||
if ENABLE_INQUIRE_MODE:
|
||||
# Use a file lock to prevent multiple workers from running this simultaneously
|
||||
lock_file_path = os.path.join(tempfile.gettempdir(), 'inquire_migration.lock')
|
||||
|
||||
try:
|
||||
with open(lock_file_path, 'w') as lock_file:
|
||||
# Try to acquire exclusive lock (non-blocking)
|
||||
try:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
app.logger.info("Acquired migration lock, checking for existing recordings that need chunking for inquire mode...")
|
||||
|
||||
completed_recordings = Recording.query.filter_by(status='COMPLETED').all()
|
||||
recordings_needing_processing = []
|
||||
|
||||
for recording in completed_recordings:
|
||||
if recording.transcription: # Has transcription
|
||||
chunk_count = TranscriptChunk.query.filter_by(recording_id=recording.id).count()
|
||||
if chunk_count == 0: # No chunks yet
|
||||
recordings_needing_processing.append(recording)
|
||||
|
||||
if recordings_needing_processing:
|
||||
app.logger.info(f"Found {len(recordings_needing_processing)} recordings that need chunking for inquire mode")
|
||||
app.logger.info("Processing first 10 recordings automatically. Use admin API or migration script for remaining recordings.")
|
||||
|
||||
# Process first 10 recordings automatically to avoid long startup times
|
||||
batch_size = min(10, len(recordings_needing_processing))
|
||||
processed = 0
|
||||
|
||||
for i in range(batch_size):
|
||||
recording = recordings_needing_processing[i]
|
||||
try:
|
||||
success = process_recording_chunks(recording.id)
|
||||
if success:
|
||||
processed += 1
|
||||
app.logger.info(f"Processed chunks for recording: {recording.title} ({recording.id})")
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Failed to process chunks for recording {recording.id}: {e}")
|
||||
|
||||
remaining = len(recordings_needing_processing) - processed
|
||||
if remaining > 0:
|
||||
app.logger.info(f"Successfully processed {processed} recordings. {remaining} recordings remaining.")
|
||||
app.logger.info("Use the admin migration API or run 'python migrate_existing_recordings.py' to process remaining recordings.")
|
||||
else:
|
||||
app.logger.info(f"Successfully processed all {processed} recordings for inquire mode.")
|
||||
else:
|
||||
app.logger.info("All existing recordings are already processed for inquire mode.")
|
||||
|
||||
except BlockingIOError:
|
||||
app.logger.info("Migration already running in another worker, skipping...")
|
||||
|
||||
except Exception as e:
|
||||
app.logger.warning(f"Error during existing recordings migration: {e}")
|
||||
app.logger.info("Existing recordings can be migrated later using the admin API or migration script.")
|
||||
|
||||
except Exception as e:
|
||||
app.logger.error(f"Error during database migration: {e}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# For standalone migration script
|
||||
from src.app import app
|
||||
with app.app_context():
|
||||
initialize_database(app)
|
||||
Reference in New Issue
Block a user