Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)

This commit is contained in:
InnovA AI
2026-03-16 21:47:37 +00:00
commit 42772a31ed
365 changed files with 103572 additions and 0 deletions

94
scripts/create_admin.py Normal file
View File

@@ -0,0 +1,94 @@
#!/usr/bin/env python3
import os
import sys
import getpass
from email_validator import validate_email, EmailNotValidError
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Try to import from app context
try:
from flask import current_app
app = current_app._get_current_object()
with app.app_context():
db = app.extensions['sqlalchemy'].db
User = app.extensions['sqlalchemy'].db.metadata.tables['user']
bcrypt = app.extensions.get('bcrypt')
except (RuntimeError, AttributeError, KeyError):
# If not in app context, import directly
try:
from src.app import app, db, User, bcrypt
except ImportError as e:
print(f"Error: Could not import required modules: {e}")
print("Make sure create_admin.py is runnable and PYTHONPATH is set.")
sys.exit(1)
def create_admin_user():
"""
Create an admin user interactively.
"""
print("Creating admin user for Speakr application")
print("=========================================")
# Get username
while True:
username = input("Enter username (min 3 characters): ").strip()
if len(username) < 3:
print("Username must be at least 3 characters long.")
continue
# Check if username already exists
with app.app_context():
existing_user = db.session.query(User).filter_by(username=username).first()
if existing_user:
print(f"Username '{username}' already exists. Please choose another.")
continue
break
# Get email
skip_domain_check = os.environ.get('SKIP_EMAIL_DOMAIN_CHECK', 'false').lower() == 'true'
while True:
email = input("Enter email address: ").strip()
try:
# Validate email (skip DNS/MX check if SKIP_EMAIL_DOMAIN_CHECK=true)
validate_email(email, check_deliverability=not skip_domain_check)
# Check if email already exists
with app.app_context():
existing_email = db.session.query(User).filter_by(email=email).first()
if existing_email:
print(f"Email '{email}' already exists. Please use another.")
continue
break
except EmailNotValidError as e:
print(f"Invalid email: {str(e)}")
# Get password
while True:
password = getpass.getpass("Enter password (min 8 characters): ")
if len(password) < 8:
print("Password must be at least 8 characters long.")
continue
confirm_password = getpass.getpass("Confirm password: ")
if password != confirm_password:
print("Passwords do not match. Please try again.")
continue
break
# Create user
with app.app_context():
hashed_password = bcrypt.generate_password_hash(password).decode('utf-8')
new_user = User(username=username, email=email, password=hashed_password, is_admin=True)
db.session.add(new_user)
db.session.commit()
print("\nAdmin user created successfully!")
print(f"Username: {username}")
print(f"Email: {email}")
print("You can now log in to the application with these credentials.")
if __name__ == "__main__":
create_admin_user()

View File

@@ -0,0 +1,25 @@
#!/bin/bash
set -e
# Create necessary directories
mkdir -p /data/uploads /data/instance
chmod 755 /data/uploads /data/instance
# Initialize the database if it doesn't exist
if [ ! -f /data/instance/transcriptions.db ]; then
echo "Database doesn't exist. Creating new database..."
python -c "from src.app import app, db; app.app_context().push(); db.create_all()"
echo "Database created successfully."
else
echo "Database exists. Checking for schema updates..."
python -c "from src.app import app; app.app_context().push()"
fi
# Check if we need to create an admin user (regardless of whether the database exists)
if [ -n "$ADMIN_USERNAME" ] && [ -n "$ADMIN_EMAIL" ] && [ -n "$ADMIN_PASSWORD" ]; then
echo "Creating admin user using environment variables..."
cd /app && python scripts/docker_create_admin.py
fi
# Start the application
exec "$@"

View File

@@ -0,0 +1,92 @@
#!/usr/bin/env python3
import os
import sys
from email_validator import validate_email, EmailNotValidError
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Try to import from app context
try:
from flask import current_app
app = current_app._get_current_object()
with app.app_context():
db = app.extensions['sqlalchemy'].db
User = app.extensions['sqlalchemy'].db.metadata.tables['user']
bcrypt = app.extensions.get('bcrypt')
except (RuntimeError, AttributeError, KeyError):
# If not in app context, import directly
try:
from src.app import app, db, User, bcrypt
except ImportError as e:
print(f"Error: Could not import required modules: {e}")
print("Make sure docker_create_admin.py is runnable and PYTHONPATH is set.")
sys.exit(1)
def create_admin_user_from_env():
"""
Create an admin user from environment variables.
Required environment variables:
- ADMIN_USERNAME
- ADMIN_EMAIL
- ADMIN_PASSWORD
"""
print("Creating admin user for Speakr application from environment variables")
print("=================================================================")
# Get values from environment variables
username = os.environ.get('ADMIN_USERNAME')
email = os.environ.get('ADMIN_EMAIL')
password = os.environ.get('ADMIN_PASSWORD')
# Validate required environment variables
if not username or not email or not password:
print("Error: ADMIN_USERNAME, ADMIN_EMAIL, and ADMIN_PASSWORD environment variables must be set.")
sys.exit(1)
# Validate username
if len(username) < 3:
print("Error: Username must be at least 3 characters long.")
sys.exit(1)
# Validate email (skip DNS/MX check if SKIP_EMAIL_DOMAIN_CHECK=true)
skip_domain_check = os.environ.get('SKIP_EMAIL_DOMAIN_CHECK', 'false').lower() == 'true'
try:
validate_email(email, check_deliverability=not skip_domain_check)
except EmailNotValidError as e:
print(f"Error: Invalid email: {str(e)}")
sys.exit(1)
# Validate password
if len(password) < 8:
print("Error: Password must be at least 8 characters long.")
sys.exit(1)
# Create user
with app.app_context():
# Check if username already exists
existing_user = db.session.query(User).filter_by(username=username).first()
if existing_user:
print(f"User with username '{username}' already exists.")
sys.exit(0)
# Check if email already exists
existing_email = db.session.query(User).filter_by(email=email).first()
if existing_email:
print(f"User with email '{email}' already exists.")
sys.exit(0)
# Create new admin user
hashed_password = bcrypt.generate_password_hash(password).decode('utf-8')
new_user = User(username=username, email=email, password=hashed_password, is_admin=True)
db.session.add(new_user)
db.session.commit()
print("\nAdmin user created successfully!")
print(f"Username: {username}")
print(f"Email: {email}")
print("You can now log in to the application with these credentials.")
if __name__ == "__main__":
create_admin_user_from_env()

View File

@@ -0,0 +1,116 @@
#!/usr/bin/env python3
"""
Download all CDN dependencies for offline deployment
"""
import os
import requests
from pathlib import Path
# Base directory for vendor files
VENDOR_DIR = Path(__file__).parent.parent / "static" / "vendor"
# Dependencies to download
DEPENDENCIES = {
"css": {
"fontawesome.min.css": "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css",
"easymde.min.css": "https://cdn.jsdelivr.net/npm/easymde/dist/easymde.min.css",
},
"js": {
"tailwind.min.js": "https://cdn.tailwindcss.com/3.4.0",
"vue.global.js": "https://cdn.jsdelivr.net/npm/vue@3/dist/vue.global.js",
"marked.min.js": "https://cdn.jsdelivr.net/npm/marked/marked.min.js",
"easymde.min.js": "https://cdn.jsdelivr.net/npm/easymde/dist/easymde.min.js",
"axios.min.js": "https://cdn.jsdelivr.net/npm/axios/dist/axios.min.js",
}
}
# Font Awesome webfonts
FONTAWESOME_FONTS = [
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/webfonts/fa-brands-400.ttf",
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/webfonts/fa-brands-400.woff2",
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/webfonts/fa-regular-400.ttf",
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/webfonts/fa-regular-400.woff2",
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/webfonts/fa-solid-900.ttf",
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/webfonts/fa-solid-900.woff2",
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/webfonts/fa-v4compatibility.ttf",
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/webfonts/fa-v4compatibility.woff2",
]
def download_file(url, filepath):
"""Download a file from URL to filepath"""
print(f"Downloading {url} to {filepath}")
try:
response = requests.get(url, timeout=30)
response.raise_for_status()
# Create directory if it doesn't exist
filepath.parent.mkdir(parents=True, exist_ok=True)
# Write file
with open(filepath, 'wb') as f:
f.write(response.content)
print(f" ✓ Downloaded {filepath.name}")
return True
except Exception as e:
print(f" ✗ Failed to download {url}: {e}")
return False
def main():
print("Downloading offline dependencies...")
print(f"Vendor directory: {VENDOR_DIR}")
# Check if we're in production mode
is_production = os.environ.get('FLASK_ENV') == 'production' or os.environ.get('PRODUCTION') == '1'
if is_production:
print("⚙️ PRODUCTION MODE: Using production builds")
# Replace Vue.js development build with production build
DEPENDENCIES['js']['vue.global.js'] = "https://cdn.jsdelivr.net/npm/vue@3/dist/vue.global.prod.js"
else:
print("⚙️ DEVELOPMENT MODE: Using development builds")
# Download CSS and JS files
for file_type, files in DEPENDENCIES.items():
print(f"\n{file_type.upper()} Files:")
for filename, url in files.items():
filepath = VENDOR_DIR / file_type / filename
download_file(url, filepath)
# Download Font Awesome fonts
print("\nFont Awesome Webfonts:")
for url in FONTAWESOME_FONTS:
filename = url.split("/")[-1]
filepath = VENDOR_DIR / "fonts" / "webfonts" / filename
download_file(url, filepath)
# Update Font Awesome CSS to use local fonts
fa_css_path = VENDOR_DIR / "css" / "fontawesome.min.css"
if fa_css_path.exists():
print("\nUpdating Font Awesome CSS to use local fonts...")
with open(fa_css_path, 'r') as f:
content = f.read()
# Replace CDN URLs with local paths - handle both relative and absolute URLs
content = content.replace(
"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/webfonts/",
"../fonts/webfonts/"
)
# Also replace any relative URLs that might be in the minified CSS
content = content.replace(
"../webfonts/",
"../fonts/webfonts/"
)
content = content.replace(
"./webfonts/",
"../fonts/webfonts/"
)
with open(fa_css_path, 'w') as f:
f.write(content)
print(" ✓ Updated Font Awesome CSS paths")
print("\n✅ All dependencies downloaded successfully!")
if __name__ == "__main__":
main()

44
scripts/migrate_docker.sh Normal file
View File

@@ -0,0 +1,44 @@
#!/bin/bash
# Manual migration script for Docker deployments
# NOTE: The first 10 recordings are processed automatically on startup.
# This script is for processing any remaining recordings.
echo "🎯 Inquire Mode Manual Migration for Docker"
echo "============================================="
# Check if container is running
if ! docker compose ps | grep -q "speakr.*Up"; then
echo "❌ Speakr container is not running. Please start it first with:"
echo " docker compose up -d"
exit 1
fi
echo " Note: The first 10 recordings are processed automatically on startup."
echo " This script processes any remaining recordings that need chunking."
echo ""
echo "🔍 Checking how many recordings still need processing..."
# First, do a dry run to see what would be processed
docker compose exec app python migrate_existing_recordings.py --dry-run
echo ""
echo "⚠️ Do you want to proceed with processing these recordings?"
echo "⚠️ This will create embeddings and may take several minutes."
read -p "Continue? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo "🚀 Starting migration..."
docker compose exec app python migrate_existing_recordings.py --process --batch-size 5
if [ $? -eq 0 ]; then
echo "✅ Migration completed successfully!"
echo "🎉 Your existing recordings are now ready for Inquire Mode!"
else
echo "❌ Migration failed. Check the logs above for details."
exit 1
fi
else
echo "❌ Migration cancelled."
exit 0
fi

View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python3
"""
Migration script to process existing recordings for Inquire Mode.
This script will chunk and vectorize all existing recordings that haven't been processed yet.
"""
import os
import sys
from src.app import app, db, Recording, TranscriptChunk, process_recording_chunks
def count_recordings_needing_processing():
"""Count how many recordings need chunk processing."""
with app.app_context():
# Get all completed recordings
completed_recordings = Recording.query.filter_by(status='COMPLETED').all()
# Check which ones don't have chunks
recordings_needing_processing = []
for recording in completed_recordings:
if recording.transcription: # Has transcription
chunk_count = TranscriptChunk.query.filter_by(recording_id=recording.id).count()
if chunk_count == 0: # No chunks yet
recordings_needing_processing.append(recording)
return recordings_needing_processing
def migrate_existing_recordings(batch_size=10, dry_run=False):
"""
Process existing recordings in batches to create chunks and embeddings.
Args:
batch_size (int): Number of recordings to process at once
dry_run (bool): If True, just show what would be processed
"""
with app.app_context():
recordings_to_process = count_recordings_needing_processing()
print(f"🔍 Found {len(recordings_to_process)} recordings that need chunk processing")
if len(recordings_to_process) == 0:
print("✅ All recordings are already processed!")
return True
if dry_run:
print("\n📋 Recordings that would be processed:")
for i, recording in enumerate(recordings_to_process, 1):
print(f" {i}. {recording.title} (ID: {recording.id}) - {len(recording.transcription)} chars")
print(f"\nThis is a dry run. Use --process to actually run the migration.")
return True
print(f"🚀 Processing {len(recordings_to_process)} recordings in batches of {batch_size}")
processed = 0
errors = 0
for i in range(0, len(recordings_to_process), batch_size):
batch = recordings_to_process[i:i + batch_size]
print(f"\n📦 Processing batch {i//batch_size + 1} ({len(batch)} recordings)...")
for recording in batch:
try:
print(f" ⏳ Processing: {recording.title} (ID: {recording.id})")
success = process_recording_chunks(recording.id)
if success:
processed += 1
# Get chunk count to report
chunk_count = TranscriptChunk.query.filter_by(recording_id=recording.id).count()
print(f" ✅ Created {chunk_count} chunks")
else:
errors += 1
print(f" ❌ Failed to process recording {recording.id}")
except Exception as e:
errors += 1
print(f" ❌ Error processing recording {recording.id}: {e}")
# Commit batch
try:
db.session.commit()
print(f" 💾 Batch committed successfully")
except Exception as e:
db.session.rollback()
print(f" ❌ Error committing batch: {e}")
errors += len(batch)
print(f"\n📊 Migration Summary:")
print(f" ✅ Successfully processed: {processed}")
print(f" ❌ Errors: {errors}")
print(f" 📈 Success rate: {(processed/(processed+errors)*100):.1f}%" if (processed+errors) > 0 else "N/A")
return errors == 0
def main():
"""Main function to handle command line arguments."""
import argparse
parser = argparse.ArgumentParser(description='Migrate existing recordings for Inquire Mode')
parser.add_argument('--dry-run', action='store_true',
help='Show what would be processed without actually processing')
parser.add_argument('--process', action='store_true',
help='Actually process the recordings')
parser.add_argument('--batch-size', type=int, default=10,
help='Number of recordings to process in each batch (default: 10)')
args = parser.parse_args()
if not args.dry_run and not args.process:
print("❌ Please specify either --dry-run or --process")
print("Use --help for more information")
return False
print("🎯 Inquire Mode Migration Tool")
print("=" * 40)
try:
if args.dry_run:
success = migrate_existing_recordings(args.batch_size, dry_run=True)
else:
print("⚠️ This will process all existing recordings and create embeddings.")
print("⚠️ This may take a while and use significant CPU/memory.")
confirm = input("Continue? (y/N): ")
if confirm.lower() != 'y':
print("❌ Migration cancelled by user")
return False
success = migrate_existing_recordings(args.batch_size, dry_run=False)
return success
except KeyboardInterrupt:
print("\n❌ Migration cancelled by user")
return False
except Exception as e:
print(f"❌ Migration failed: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)

137
scripts/migrate_team_to_group.py Executable file
View File

@@ -0,0 +1,137 @@
#!/usr/bin/env python3
"""
Migration script to rename team tables to group tables.
This handles the refactoring from team-based to group-based terminology.
"""
import sys
import os
# Add the parent directory to the path to import app modules
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from src.app import app, db
from sqlalchemy import text
def migrate_tables():
"""Copy data from team tables to group tables and remove old tables."""
with app.app_context():
try:
# Check if old tables exist
inspector = db.inspect(db.engine)
existing_tables = inspector.get_table_names()
print("Existing tables:", existing_tables)
# Check if we need to migrate data
if 'team' in existing_tables and 'group' in existing_tables:
# Both tables exist - need to copy data
print("\nBoth 'team' and 'group' tables exist. Copying data...")
# Check if there's data in the old table
result = db.session.execute(text('SELECT COUNT(*) FROM team'))
old_count = result.scalar()
print(f"Found {old_count} records in 'team' table")
if old_count > 0:
# Copy data from team to group
print("Copying data from 'team' to 'group'...")
db.session.execute(text(
'INSERT INTO "group" (id, name, description, created_at) '
'SELECT id, name, description, created_at FROM team'
))
db.session.commit()
print(f"✓ Copied {old_count} records to 'group' table")
# Drop the old team table
print("Dropping old 'team' table...")
db.session.execute(text('DROP TABLE team'))
db.session.commit()
print("✓ Dropped old 'team' table")
elif 'team' in existing_tables and 'group' not in existing_tables:
# Only old table exists - rename it
print("\nRenaming 'team' table to 'group'...")
db.session.execute(text('ALTER TABLE team RENAME TO "group"'))
db.session.commit()
print("✓ Renamed 'team' to 'group'")
else:
print("\n'team' table not found or already migrated")
# Migrate team_membership
if 'team_membership' in existing_tables and 'group_membership' in existing_tables:
# Both tables exist - need to copy data
print("\nBoth 'team_membership' and 'group_membership' tables exist. Copying data...")
# Check if there's data in the old table
result = db.session.execute(text('SELECT COUNT(*) FROM team_membership'))
old_count = result.scalar()
print(f"Found {old_count} records in 'team_membership' table")
if old_count > 0:
# Copy data from team_membership to group_membership
print("Copying data from 'team_membership' to 'group_membership'...")
db.session.execute(text(
'INSERT INTO group_membership (id, user_id, group_id, role, joined_at) '
'SELECT id, user_id, team_id, role, joined_at FROM team_membership'
))
db.session.commit()
print(f"✓ Copied {old_count} records to 'group_membership' table")
# Drop the old team_membership table
print("Dropping old 'team_membership' table...")
db.session.execute(text('DROP TABLE team_membership'))
db.session.commit()
print("✓ Dropped old 'team_membership' table")
elif 'team_membership' in existing_tables and 'group_membership' not in existing_tables:
# Only old table exists - rename it
print("\nRenaming 'team_membership' table to 'group_membership'...")
db.session.execute(text('ALTER TABLE team_membership RENAME TO group_membership'))
db.session.commit()
print("✓ Renamed 'team_membership' to 'group_membership'")
else:
print("\n'team_membership' table not found or already migrated")
# Migrate team_id to group_id in tags table
print("\nMigrating tag associations from team_id to group_id...")
result = db.session.execute(text(
'UPDATE tag SET group_id = team_id WHERE team_id IS NOT NULL AND group_id IS NULL'
))
db.session.commit()
print(f"✓ Migrated {result.rowcount} tag associations")
# Migrate share_with_team_lead to share_with_group_lead in tags
result = db.session.execute(text(
'UPDATE tag SET share_with_group_lead = share_with_team_lead WHERE share_with_team_lead IS NOT NULL AND share_with_group_lead IS NULL'
))
db.session.commit()
print(f"✓ Migrated {result.rowcount} share_with_lead settings")
print("\n✅ Migration completed successfully!")
print("\nPlease restart the application for changes to take full effect.")
except Exception as e:
print(f"\n❌ Error during migration: {e}")
db.session.rollback()
sys.exit(1)
if __name__ == '__main__':
print("=" * 60)
print("Team to Group Migration Script")
print("=" * 60)
print("\nThis script will rename database tables:")
print(" - 'team''group'")
print(" - 'team_membership''group_membership'")
# Check for --yes flag to skip confirmation
if '--yes' in sys.argv or '-y' in sys.argv:
print("\nAuto-confirming migration (--yes flag detected)...\n")
else:
print("\nPress Ctrl+C to cancel, or Enter to continue...")
try:
input()
except KeyboardInterrupt:
print("\n\nMigration cancelled.")
sys.exit(0)
migrate_tables()

219
scripts/parse_asr_json.py Executable file
View File

@@ -0,0 +1,219 @@
#!/usr/bin/env python3
"""
ASR JSON Parser - Analyzes speaker information in ASR response JSON files
"""
import json
import sys
from collections import defaultdict, Counter
def analyze_asr_json(json_data):
"""
Analyze ASR JSON data to understand speaker distribution and identify issues
"""
if not isinstance(json_data, dict) or 'segments' not in json_data:
print("ERROR: Invalid JSON structure. Expected dict with 'segments' key.")
return
segments = json_data['segments']
if not isinstance(segments, list):
print("ERROR: 'segments' should be a list.")
return
print(f"=== ASR JSON Analysis ===")
print(f"Total segments: {len(segments)}")
print()
# Track segment-level speakers
segment_speakers = []
segments_with_speaker = 0
segments_without_speaker = 0
# Track word-level speakers
word_speakers = []
words_with_speaker = 0
words_without_speaker = 0
# Track segments with null speakers
null_speaker_segments = []
for i, segment in enumerate(segments):
# Analyze segment-level speaker
segment_speaker = segment.get('speaker')
if segment_speaker is not None:
segment_speakers.append(segment_speaker)
segments_with_speaker += 1
else:
segments_without_speaker += 1
null_speaker_segments.append(i)
# Analyze word-level speakers
words = segment.get('words', [])
for word_data in words:
word_speaker = word_data.get('speaker')
if word_speaker is not None:
word_speakers.append(word_speaker)
words_with_speaker += 1
else:
words_without_speaker += 1
# Print segment-level analysis
print("=== SEGMENT-LEVEL SPEAKERS ===")
print(f"Segments with speakers: {segments_with_speaker}")
print(f"Segments without speakers: {segments_without_speaker}")
if segment_speakers:
segment_speaker_counts = Counter(segment_speakers)
print(f"Unique segment speakers: {sorted(segment_speaker_counts.keys())}")
print("Segment speaker distribution:")
for speaker, count in segment_speaker_counts.most_common():
print(f" {speaker}: {count} segments")
else:
print("No segment-level speakers found!")
print()
# Print word-level analysis
print("=== WORD-LEVEL SPEAKERS ===")
print(f"Words with speakers: {words_with_speaker}")
print(f"Words without speakers: {words_without_speaker}")
if word_speakers:
word_speaker_counts = Counter(word_speakers)
print(f"Unique word speakers: {sorted(word_speaker_counts.keys())}")
print("Word speaker distribution:")
for speaker, count in word_speaker_counts.most_common():
print(f" {speaker}: {count} words")
else:
print("No word-level speakers found!")
print()
# Analyze segments without speakers
if null_speaker_segments:
print("=== SEGMENTS WITHOUT SPEAKERS ===")
print(f"Segment indices without speakers: {null_speaker_segments[:10]}{'...' if len(null_speaker_segments) > 10 else ''}")
print("\nFirst few segments without speakers:")
for i in null_speaker_segments[:5]:
segment = segments[i]
text = segment.get('text', '').strip()
start = segment.get('start')
end = segment.get('end')
words = segment.get('words', [])
print(f" Segment {i}: '{text}' ({start}-{end}s)")
print(f" Keys: {list(segment.keys())}")
# Check if words have speakers even when segment doesn't
word_speakers_in_segment = [w.get('speaker') for w in words if w.get('speaker')]
if word_speakers_in_segment:
word_speaker_counts = Counter(word_speakers_in_segment)
print(f" Word speakers: {dict(word_speaker_counts)}")
else:
print(f" No word speakers either")
print()
# Suggest speaker assignment strategy
print("=== SPEAKER ASSIGNMENT STRATEGY ===")
if segments_without_speaker > 0:
print(f"Found {segments_without_speaker} segments without speakers.")
if words_with_speaker > 0:
print("RECOMMENDATION: Use word-level speaker information to assign segment speakers.")
print("Strategy: For segments without speakers, find the most common speaker among their words.")
else:
print("RECOMMENDATION: Assign a default speaker label (e.g., 'UNKNOWN_SPEAKER') to segments without speakers.")
else:
print("All segments have speakers assigned. No action needed.")
def suggest_preprocessing_fix(json_data):
"""
Suggest how to fix the preprocessing based on the JSON structure
"""
print("\n=== PREPROCESSING FIX SUGGESTION ===")
segments = json_data.get('segments', [])
if not segments:
return
# Check if we can derive segment speakers from word speakers
segments_fixable = 0
for segment in segments:
if segment.get('speaker') is None:
words = segment.get('words', [])
word_speakers = [w.get('speaker') for w in words if w.get('speaker')]
if word_speakers:
segments_fixable += 1
if segments_fixable > 0:
print(f"{segments_fixable} segments can be fixed using word-level speaker information.")
print("\nSuggested code fix:")
print("""
# In the ASR processing function, replace the segment processing with:
for i, segment in enumerate(asr_response_data['segments']):
speaker = segment.get('speaker')
text = segment.get('text', '').strip()
# If segment doesn't have a speaker, try to derive from words
if speaker is None:
words = segment.get('words', [])
word_speakers = [w.get('speaker') for w in words if w.get('speaker')]
if word_speakers:
# Use the most common speaker among the words
from collections import Counter
speaker_counts = Counter(word_speakers)
speaker = speaker_counts.most_common(1)[0][0]
app.logger.info(f"Derived speaker '{speaker}' for segment {i} from word-level data")
else:
speaker = 'UNKNOWN_SPEAKER'
app.logger.warning(f"No speaker info available for segment {i}, using UNKNOWN_SPEAKER")
simplified_segments.append({
'speaker': speaker,
'sentence': text,
'start_time': segment.get('start'),
'end_time': segment.get('end')
})
""")
else:
print("❌ Segments cannot be fixed using word-level data.")
print("Recommendation: Assign 'UNKNOWN_SPEAKER' to segments without speakers.")
def main():
if len(sys.argv) != 2:
print("Usage: python parse_asr_json.py <json_file>")
print(" or: python parse_asr_json.py -")
print(" (use '-' to read from stdin)")
sys.exit(1)
filename = sys.argv[1]
try:
if filename == '-':
# Read from stdin
json_text = sys.stdin.read()
else:
# Read from file
with open(filename, 'r', encoding='utf-8') as f:
json_text = f.read()
# Parse JSON
json_data = json.loads(json_text)
# Analyze the data
analyze_asr_json(json_data)
suggest_preprocessing_fix(json_data)
except FileNotFoundError:
print(f"ERROR: File '{filename}' not found.")
sys.exit(1)
except json.JSONDecodeError as e:
print(f"ERROR: Invalid JSON - {e}")
sys.exit(1)
except Exception as e:
print(f"ERROR: {e}")
sys.exit(1)
if __name__ == "__main__":
main()

21
scripts/pre-commit Executable file
View File

@@ -0,0 +1,21 @@
#!/bin/sh
#
# Pre-commit hook: runs PostgreSQL migration compatibility tests
# when migration-related files are staged.
#
# Install: ln -sf ../../scripts/pre-commit .git/hooks/pre-commit
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM)
# Only run if migration-related files are staged
if echo "$STAGED_FILES" | grep -qE '^(src/init_db\.py|src/utils/database\.py|src/models/)'; then
echo "Migration files changed — running PostgreSQL compatibility checks..."
python tests/test_migration_compatibility.py
if [ $? -ne 0 ]; then
echo ""
echo "Pre-commit hook FAILED: PostgreSQL migration compatibility issues found."
echo "Fix the issues above before committing."
exit 1
fi
echo "All migration compatibility checks passed."
fi

130
scripts/reset_db.py Normal file
View File

@@ -0,0 +1,130 @@
#!/usr/bin/env python3
# Add this near the top if you run this standalone often outside app context
import os
import sys
import shutil
# Add project root to path if necessary for 'app' import
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
# Load environment variables in case DB path relies on them (optional here)
# from dotenv import load_dotenv
# load_dotenv()
# Check if running within app context already (e.g., via Flask command)
try:
from flask import current_app
# Ensure app context is pushed if needed for config access
app = current_app._get_current_object()
# Make sure db is initialized within the app context if needed
# (SQLAlchemy initialization in app.py handles this mostly)
with app.app_context():
db = app.extensions['sqlalchemy'].db # Access db via extensions
except (RuntimeError, AttributeError, KeyError):
# If not in app context, import directly
try:
# Ensure this import reflects the updated app.py with the new model
from src.app import app, db
except ImportError as e:
print(f"Error: Could not import 'app' and 'db': {e}")
print("Make sure reset_db.py is runnable and PYTHONPATH is set.")
sys.exit(1)
def reset_database(delete_uploads=True):
# Determine the database path relative to the instance folder
# Use app config if available
instance_path = app.instance_path if hasattr(app, 'instance_path') else os.path.join(os.getcwd(), 'instance')
try:
# Ensure app context for config access if not already present
with app.app_context():
# Use absolute path from config
db_uri = app.config.get('SQLALCHEMY_DATABASE_URI', 'sqlite:///instance/transcriptions.db')
# Handle relative vs absolute paths specified in URI
if db_uri.startswith('sqlite:///'):
# Assume absolute path from URI root if starts with '///'
db_path = db_uri.replace('sqlite:///', '/', 1) # Replace only first
# Ensure instance path reflects the directory containing the DB
instance_path = os.path.dirname(db_path)
elif db_uri.startswith('sqlite://'):
# Assume relative path from instance folder
db_filename = db_uri.split('/')[-1]
db_path = os.path.join(instance_path, db_filename)
else: # Handle other DB types or formats if needed
print(f"Warning: Non-SQLite URI detected: {db_uri}. Deletion logic might need adjustment.")
# Attempt to parse or fallback
db_filename = db_uri.split('/')[-1] # Best guess
db_path = os.path.join(instance_path, db_filename)
except Exception as config_e:
print(f"Error accessing app config for DB path: {config_e}. Using default.")
# Fallback if config access fails
instance_path = os.path.join(os.getcwd(), 'instance')
db_filename = 'transcriptions.db'
db_path = os.path.join(instance_path, db_filename)
# Ensure instance directory exists
print(f"Ensuring instance directory exists: {instance_path}")
os.makedirs(instance_path, exist_ok=True)
print(f"Database path identified as: {db_path}")
# Remove existing database if it exists
if os.path.exists(db_path):
print(f"Removing existing database at {db_path}")
try:
os.remove(db_path)
# Also remove journal file if it exists
journal_path = db_path + "-journal"
if os.path.exists(journal_path):
os.remove(journal_path)
print(f"Removing existing journal file at {journal_path}")
except OSError as e:
print(f"Error removing database file: {e}. Check permissions or if it's in use.")
# Decide whether to exit or continue
# sys.exit(1)
# Create application context to work with the database
try:
with app.app_context():
print("Creating new database schema (including 'summary' column)...")
# Create all tables defined in models (app.py)
db.create_all()
print("Database schema created successfully!")
except Exception as e:
print(f"Error creating database schema: {e}")
# Attempt rollback if possible (though less relevant for create_all)
try:
db.session.rollback()
except Exception as rb_e:
print(f"Rollback attempt failed: {rb_e}")
sys.exit(1)
# Delete all files in the uploads directory if requested
if delete_uploads:
try:
uploads_dir = os.path.join(os.getcwd(), 'uploads')
if os.path.exists(uploads_dir):
print(f"Deleting all files in uploads directory: {uploads_dir}")
for filename in os.listdir(uploads_dir):
file_path = os.path.join(uploads_dir, filename)
try:
if os.path.isfile(file_path):
os.remove(file_path)
print(f"Deleted file: {file_path}")
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
print(f"Deleted directory: {file_path}")
except Exception as e:
print(f"Error deleting {file_path}: {e}")
print("All files in uploads directory have been deleted.")
else:
print(f"Uploads directory not found: {uploads_dir}")
# Create the directory if it doesn't exist
os.makedirs(uploads_dir, exist_ok=True)
print(f"Created uploads directory: {uploads_dir}")
except Exception as e:
print(f"Error cleaning uploads directory: {e}")
if __name__ == "__main__":
print("Attempting to reset the database and clean up all data...")
reset_database(delete_uploads=True)
print("Database reset process finished.")

163
scripts/resize_logo.py Normal file
View File

@@ -0,0 +1,163 @@
#!/usr/bin/env python3
"""
Logo Resizer Script for Speakr
Resizes a source PNG image to all required icon sizes for PWA and favicon support.
Usage:
python resize_logo.py <source_image.png>
Requirements:
pip install Pillow
This script will create all the necessary icon sizes in the static/img/ directory.
"""
import sys
import os
from PIL import Image, ImageDraw
import argparse
def create_maskable_version(image, size):
"""Create a maskable version with safe zone padding (20% on all sides)"""
# Calculate the size of the logo with padding
logo_size = int(size * 0.6) # Logo takes 60% of the canvas (20% padding on each side)
# Create new image with transparent background
maskable = Image.new('RGBA', (size, size), (0, 0, 0, 0))
# Resize the original logo
logo_resized = image.resize((logo_size, logo_size), Image.Resampling.LANCZOS)
# Calculate position to center the logo
x = (size - logo_size) // 2
y = (size - logo_size) // 2
# Paste the logo onto the center of the canvas
maskable.paste(logo_resized, (x, y), logo_resized if logo_resized.mode == 'RGBA' else None)
return maskable
def resize_logo(source_path, output_dir="static/img"):
"""Resize the source image to all required sizes"""
# Check if source file exists
if not os.path.exists(source_path):
print(f"Error: Source file '{source_path}' not found!")
return False
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
try:
# Open the source image
with Image.open(source_path) as img:
# Convert to RGBA if not already (for transparency support)
if img.mode != 'RGBA':
img = img.convert('RGBA')
print(f"Source image: {img.size[0]}x{img.size[1]} pixels")
print(f"Output directory: {output_dir}")
print()
# Define all the sizes we need
sizes = {
# Essential PWA icons
'icon-192x192.png': 192,
'icon-512x512.png': 512,
# Additional recommended icons
'icon-16x16.png': 16,
'icon-32x32.png': 32,
'icon-180x180.png': 180, # Apple touch icon
# Maskable version
'icon-maskable-512x512.png': 512,
}
# Resize to each required size
for filename, size in sizes.items():
output_path = os.path.join(output_dir, filename)
if 'maskable' in filename:
# Create maskable version with safe zone
resized = create_maskable_version(img, size)
print(f"✓ Created maskable icon: {filename} ({size}x{size})")
else:
# Regular resize
resized = img.resize((size, size), Image.Resampling.LANCZOS)
print(f"✓ Created icon: {filename} ({size}x{size})")
# Save the resized image
resized.save(output_path, 'PNG', optimize=True)
print()
print("🎉 All icons created successfully!")
print()
print("Next steps:")
print("1. Replace static/img/favicon.svg with your SVG version (if you have one)")
print("2. Clear browser cache and test the new icons")
print("3. Test PWA installation to verify icons appear correctly")
return True
except Exception as e:
print(f"Error processing image: {e}")
return False
def create_ico_favicon(source_path, output_dir="static/img"):
"""Create a multi-size ICO favicon file"""
try:
with Image.open(source_path) as img:
if img.mode != 'RGBA':
img = img.convert('RGBA')
# Create different sizes for the ICO file
sizes = [16, 32, 48]
images = []
for size in sizes:
resized = img.resize((size, size), Image.Resampling.LANCZOS)
images.append(resized)
# Save as ICO file
ico_path = os.path.join(output_dir, 'favicon.ico')
images[0].save(ico_path, format='ICO', sizes=[(img.width, img.height) for img in images])
print(f"✓ Created favicon.ico with sizes: {sizes}")
except Exception as e:
print(f"Warning: Could not create favicon.ico: {e}")
def main():
parser = argparse.ArgumentParser(description='Resize logo for Speakr PWA icons')
parser.add_argument('source', help='Source PNG image file')
parser.add_argument('--output-dir', default='static/img', help='Output directory (default: static/img)')
parser.add_argument('--create-ico', action='store_true', help='Also create favicon.ico file')
args = parser.parse_args()
print("🎨 Speakr Logo Resizer")
print("=" * 50)
# Resize the logo
success = resize_logo(args.source, args.output_dir)
if success and args.create_ico:
print()
create_ico_favicon(args.source, args.output_dir)
if success:
print()
print("📁 Files created in", args.output_dir + ":")
for file in os.listdir(args.output_dir):
if file.startswith('icon-') and file.endswith('.png'):
file_path = os.path.join(args.output_dir, file)
size = os.path.getsize(file_path)
print(f" {file} ({size:,} bytes)")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python resize_logo.py <source_image.png>")
print("Example: python resize_logo.py my_logo.png")
sys.exit(1)
main()

83
scripts/resize_logo.sh Executable file
View File

@@ -0,0 +1,83 @@
#!/bin/bash
# Logo Resizer Script for Speakr (ImageMagick version)
# Resizes a source PNG image to all required icon sizes for PWA and favicon support.
#
# Usage: ./resize_logo.sh <source_image.png>
# Requirements: ImageMagick (sudo apt install imagemagick)
set -e
# Check if source file is provided
if [ $# -eq 0 ]; then
echo "Usage: $0 <source_image.png>"
echo "Example: $0 my_logo.png"
exit 1
fi
SOURCE_FILE="$1"
OUTPUT_DIR="static/img"
# Check if source file exists
if [ ! -f "$SOURCE_FILE" ]; then
echo "Error: Source file '$SOURCE_FILE' not found!"
exit 1
fi
# Check if ImageMagick is installed
if ! command -v convert &> /dev/null; then
echo "Error: ImageMagick is not installed!"
echo "Install it with: sudo apt install imagemagick"
exit 1
fi
# Create output directory if it doesn't exist
mkdir -p "$OUTPUT_DIR"
echo "🎨 Speakr Logo Resizer (ImageMagick)"
echo "=================================================="
echo "Source file: $SOURCE_FILE"
echo "Output directory: $OUTPUT_DIR"
echo
# Define sizes
declare -A SIZES=(
["icon-16x16.png"]=16
["icon-32x32.png"]=32
["icon-180x180.png"]=180
["icon-192x192.png"]=192
["icon-512x512.png"]=512
)
# Resize to each size
for filename in "${!SIZES[@]}"; do
size=${SIZES[$filename]}
output_path="$OUTPUT_DIR/$filename"
convert "$SOURCE_FILE" -resize "${size}x${size}" "$output_path"
echo "✓ Created icon: $filename (${size}x${size})"
done
# Create maskable version with padding
echo "✓ Creating maskable icon with safe zone..."
convert "$SOURCE_FILE" -resize 307x307 -gravity center -extent 512x512 -background transparent "$OUTPUT_DIR/icon-maskable-512x512.png"
echo "✓ Created maskable icon: icon-maskable-512x512.png (512x512)"
# Create favicon.ico (optional)
if command -v convert &> /dev/null; then
echo "✓ Creating favicon.ico..."
convert "$SOURCE_FILE" -resize 16x16 -resize 32x32 -resize 48x48 "$OUTPUT_DIR/favicon.ico"
echo "✓ Created favicon.ico"
fi
echo
echo "🎉 All icons created successfully!"
echo
echo "📁 Files created:"
ls -la "$OUTPUT_DIR"/icon-*.png "$OUTPUT_DIR"/favicon.ico 2>/dev/null || true
echo
echo "Next steps:"
echo "1. Replace static/img/favicon.svg with your SVG version (if you have one)"
echo "2. Clear browser cache and test the new icons"
echo "3. Test PWA installation to verify icons appear correctly"

44
scripts/test-docs-build.sh Executable file
View File

@@ -0,0 +1,44 @@
#!/bin/bash
# Test script to validate documentation build locally
# This mimics what the GitHub Actions workflow does
set -e
echo "Testing documentation build..."
# Check if we're in the right directory
if [ ! -f "docs/mkdocs.yml" ]; then
echo "Error: docs/mkdocs.yml not found. Run this script from the project root."
exit 1
fi
# Create a virtual environment for testing
echo "Creating virtual environment..."
python3 -m venv .venv-docs-test
source .venv-docs-test/bin/activate
# Install dependencies
echo "Installing dependencies..."
pip install --upgrade pip
pip install -r docs/requirements-docs.txt
# Build the documentation
echo "Building documentation..."
cd docs
export CI=true # Enable git plugin in CI mode
mkdocs build --strict --site-dir _test_site
echo ""
echo "✅ Documentation build successful!"
echo "Built site is in: docs/_test_site"
echo ""
echo "To serve locally for testing:"
echo " cd docs && mkdocs serve"
# Cleanup
cd ..
deactivate
rm -rf .venv-docs-test
echo "Cleanup complete."

60
scripts/update_version.py Normal file
View File

@@ -0,0 +1,60 @@
#!/usr/bin/env python3
"""
Simple script to update the VERSION file.
Usage: python update_version.py v0.4.3
"""
import sys
import re
def update_version(new_version):
# Validate version format (basic check)
if not re.match(r'^v?\d+\.\d+\.\d+', new_version):
print(f"Warning: Version '{new_version}' doesn't follow standard format (v1.2.3)")
# Ensure version starts with 'v'
if not new_version.startswith('v'):
new_version = 'v' + new_version
# Write to VERSION file
try:
with open('VERSION', 'w') as f:
f.write(new_version)
print(f"✅ Updated VERSION file to: {new_version}")
# Optional: Create git tag if in a git repo
import subprocess
try:
# Check if we're in a git repo
subprocess.check_output(['git', 'status'], stderr=subprocess.DEVNULL)
# Create and push tag
subprocess.check_output(['git', 'tag', new_version], stderr=subprocess.DEVNULL)
print(f"✅ Created git tag: {new_version}")
# Ask user if they want to push
response = input("Push tag to remote? (y/N): ").strip().lower()
if response == 'y':
subprocess.check_output(['git', 'push', 'origin', new_version])
print(f"✅ Pushed tag {new_version} to remote")
except subprocess.CalledProcessError:
print(" Not in a git repo or git tag already exists")
except Exception as e:
print(f" Git operations failed: {e}")
except Exception as e:
print(f"❌ Failed to update VERSION file: {e}")
return False
return True
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python update_version.py <version>")
print("Example: python update_version.py v0.4.3")
print("Example: python update_version.py 0.4.3-alpha")
sys.exit(1)
new_version = sys.argv[1]
success = update_version(new_version)
sys.exit(0 if success else 1)