#!/usr/bin/env python3 """ Migration script to process existing recordings for Inquire Mode. This script will chunk and vectorize all existing recordings that haven't been processed yet. """ import os import sys from src.app import app, db, Recording, TranscriptChunk, process_recording_chunks def count_recordings_needing_processing(): """Count how many recordings need chunk processing.""" with app.app_context(): # Get all completed recordings completed_recordings = Recording.query.filter_by(status='COMPLETED').all() # Check which ones don't have chunks recordings_needing_processing = [] for recording in completed_recordings: if recording.transcription: # Has transcription chunk_count = TranscriptChunk.query.filter_by(recording_id=recording.id).count() if chunk_count == 0: # No chunks yet recordings_needing_processing.append(recording) return recordings_needing_processing def migrate_existing_recordings(batch_size=10, dry_run=False): """ Process existing recordings in batches to create chunks and embeddings. Args: batch_size (int): Number of recordings to process at once dry_run (bool): If True, just show what would be processed """ with app.app_context(): recordings_to_process = count_recordings_needing_processing() print(f"šŸ” Found {len(recordings_to_process)} recordings that need chunk processing") if len(recordings_to_process) == 0: print("āœ… All recordings are already processed!") return True if dry_run: print("\nšŸ“‹ Recordings that would be processed:") for i, recording in enumerate(recordings_to_process, 1): print(f" {i}. {recording.title} (ID: {recording.id}) - {len(recording.transcription)} chars") print(f"\nThis is a dry run. Use --process to actually run the migration.") return True print(f"šŸš€ Processing {len(recordings_to_process)} recordings in batches of {batch_size}") processed = 0 errors = 0 for i in range(0, len(recordings_to_process), batch_size): batch = recordings_to_process[i:i + batch_size] print(f"\nšŸ“¦ Processing batch {i//batch_size + 1} ({len(batch)} recordings)...") for recording in batch: try: print(f" ā³ Processing: {recording.title} (ID: {recording.id})") success = process_recording_chunks(recording.id) if success: processed += 1 # Get chunk count to report chunk_count = TranscriptChunk.query.filter_by(recording_id=recording.id).count() print(f" āœ… Created {chunk_count} chunks") else: errors += 1 print(f" āŒ Failed to process recording {recording.id}") except Exception as e: errors += 1 print(f" āŒ Error processing recording {recording.id}: {e}") # Commit batch try: db.session.commit() print(f" šŸ’¾ Batch committed successfully") except Exception as e: db.session.rollback() print(f" āŒ Error committing batch: {e}") errors += len(batch) print(f"\nšŸ“Š Migration Summary:") print(f" āœ… Successfully processed: {processed}") print(f" āŒ Errors: {errors}") print(f" šŸ“ˆ Success rate: {(processed/(processed+errors)*100):.1f}%" if (processed+errors) > 0 else "N/A") return errors == 0 def main(): """Main function to handle command line arguments.""" import argparse parser = argparse.ArgumentParser(description='Migrate existing recordings for Inquire Mode') parser.add_argument('--dry-run', action='store_true', help='Show what would be processed without actually processing') parser.add_argument('--process', action='store_true', help='Actually process the recordings') parser.add_argument('--batch-size', type=int, default=10, help='Number of recordings to process in each batch (default: 10)') args = parser.parse_args() if not args.dry_run and not args.process: print("āŒ Please specify either --dry-run or --process") print("Use --help for more information") return False print("šŸŽÆ Inquire Mode Migration Tool") print("=" * 40) try: if args.dry_run: success = migrate_existing_recordings(args.batch_size, dry_run=True) else: print("āš ļø This will process all existing recordings and create embeddings.") print("āš ļø This may take a while and use significant CPU/memory.") confirm = input("Continue? (y/N): ") if confirm.lower() != 'y': print("āŒ Migration cancelled by user") return False success = migrate_existing_recordings(args.batch_size, dry_run=False) return success except KeyboardInterrupt: print("\nāŒ Migration cancelled by user") return False except Exception as e: print(f"āŒ Migration failed: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": success = main() sys.exit(0 if success else 1)