Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)

This commit is contained in:
InnovA AI
2026-03-16 21:47:37 +00:00
commit 42772a31ed
365 changed files with 103572 additions and 0 deletions

View File

@@ -0,0 +1,319 @@
#!/usr/bin/env python3
"""
Test script for ffprobe codec detection functionality.
This script tests the new codec-based detection system to ensure it correctly
identifies audio codecs, video files, and lossless formats.
"""
import os
import sys
import tempfile
import subprocess
from pathlib import Path
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.utils.ffprobe import (
get_codec_info,
is_video_file,
is_audio_file,
get_audio_codec,
needs_audio_conversion,
is_lossless_audio,
get_duration,
FFProbeError
)
def create_test_audio_file(codec, output_path, duration=1.0):
"""Create a test audio file with specific codec."""
codec_map = {
'mp3': ['ffmpeg', '-f', 'lavfi', '-i', f'sine=frequency=440:duration={duration}', '-acodec', 'libmp3lame', '-b:a', '128k', output_path],
'aac': ['ffmpeg', '-f', 'lavfi', '-i', f'sine=frequency=440:duration={duration}', '-acodec', 'aac', '-b:a', '128k', output_path],
'opus': ['ffmpeg', '-f', 'lavfi', '-i', f'sine=frequency=440:duration={duration}', '-acodec', 'libopus', '-b:a', '64k', output_path],
'flac': ['ffmpeg', '-f', 'lavfi', '-i', f'sine=frequency=440:duration={duration}', '-acodec', 'flac', output_path],
'pcm_s16le': ['ffmpeg', '-f', 'lavfi', '-i', f'sine=frequency=440:duration={duration}', '-acodec', 'pcm_s16le', '-ar', '44100', output_path],
'vorbis': ['ffmpeg', '-f', 'lavfi', '-i', f'sine=frequency=440:duration={duration}', '-acodec', 'libvorbis', '-b:a', '128k', output_path],
}
if codec not in codec_map:
raise ValueError(f"Unknown codec: {codec}")
subprocess.run(codec_map[codec], check=True, capture_output=True)
def create_test_video_file(output_path, duration=1.0):
"""Create a test video file with audio."""
subprocess.run([
'ffmpeg', '-f', 'lavfi', '-i', f'testsrc=duration={duration}:size=320x240:rate=1',
'-f', 'lavfi', '-i', f'sine=frequency=440:duration={duration}',
'-acodec', 'aac', '-vcodec', 'libx264', '-pix_fmt', 'yuv420p',
output_path
], check=True, capture_output=True)
def test_codec_detection():
"""Test basic codec detection."""
print("\n=== Testing Codec Detection ===\n")
with tempfile.TemporaryDirectory() as tmpdir:
test_files = {
'mp3': 'test.mp3',
'aac': 'test.m4a',
'opus': 'test.opus',
'flac': 'test.flac',
'pcm_s16le': 'test.wav',
'vorbis': 'test.ogg',
}
for codec, filename in test_files.items():
filepath = os.path.join(tmpdir, filename)
try:
print(f"Creating test file: {filename} with codec {codec}...")
create_test_audio_file(codec, filepath)
print(f" Probing {filename}...")
codec_info = get_codec_info(filepath)
detected_codec = codec_info['audio_codec']
print(f" ✓ Detected codec: {detected_codec}")
print(f" Has audio: {codec_info['has_audio']}")
print(f" Has video: {codec_info['has_video']}")
print(f" Format: {codec_info['format_name']}")
print(f" Duration: {codec_info['duration']:.2f}s" if codec_info['duration'] else " Duration: N/A")
if detected_codec != codec:
print(f" ⚠️ Warning: Expected {codec}, got {detected_codec}")
print()
except Exception as e:
print(f" ✗ Failed to test {codec}: {e}\n")
def test_video_detection():
"""Test video file detection."""
print("\n=== Testing Video Detection ===\n")
with tempfile.TemporaryDirectory() as tmpdir:
video_path = os.path.join(tmpdir, 'test_video.mp4')
audio_path = os.path.join(tmpdir, 'test_audio.mp3')
try:
print("Creating test video file...")
create_test_video_file(video_path)
print("Creating test audio file...")
create_test_audio_file('mp3', audio_path)
print(f"\nProbing video file...")
codec_info = get_codec_info(video_path)
print(f" Audio codec: {codec_info['audio_codec']}")
print(f" Video codec: {codec_info['video_codec']}")
print(f" Has audio: {codec_info['has_audio']}")
print(f" Has video: {codec_info['has_video']}")
is_video = is_video_file(video_path)
print(f" is_video_file(): {is_video}")
if not is_video:
print(" ✗ Video file not detected as video!")
else:
print(" ✓ Video file correctly detected")
print(f"\nProbing audio file...")
codec_info = get_codec_info(audio_path)
print(f" Audio codec: {codec_info['audio_codec']}")
print(f" Video codec: {codec_info['video_codec']}")
print(f" Has audio: {codec_info['has_audio']}")
print(f" Has video: {codec_info['has_video']}")
is_video = is_video_file(audio_path)
print(f" is_video_file(): {is_video}")
if is_video:
print(" ✗ Audio file incorrectly detected as video!")
else:
print(" ✓ Audio file correctly identified as audio-only")
print()
except Exception as e:
print(f"✗ Failed to test video detection: {e}\n")
def test_lossless_detection():
"""Test lossless audio detection."""
print("\n=== Testing Lossless Detection ===\n")
with tempfile.TemporaryDirectory() as tmpdir:
test_cases = {
'pcm_s16le': ('test.wav', True),
'flac': ('test.flac', True),
'mp3': ('test.mp3', False),
'aac': ('test.m4a', False),
'opus': ('test.opus', False),
}
for codec, (filename, expected_lossless) in test_cases.items():
filepath = os.path.join(tmpdir, filename)
try:
print(f"Creating {filename} with codec {codec}...")
create_test_audio_file(codec, filepath)
is_lossless = is_lossless_audio(filepath)
status = "" if is_lossless == expected_lossless else ""
print(f" {status} {codec}: is_lossless={is_lossless} (expected {expected_lossless})")
except Exception as e:
print(f" ✗ Failed to test {codec}: {e}")
print()
def test_conversion_check():
"""Test conversion requirement detection."""
print("\n=== Testing Conversion Check ===\n")
with tempfile.TemporaryDirectory() as tmpdir:
# Supported codecs for direct transcription
supported_codecs = ['pcm_s16le', 'mp3', 'flac', 'opus', 'aac']
test_cases = {
'mp3': ('test.mp3', False), # Supported, no conversion needed
'aac': ('test.m4a', False), # Supported, no conversion needed
'opus': ('test.opus', False), # Supported, no conversion needed
'vorbis': ('test.ogg', True), # Not in supported list, needs conversion
}
for codec, (filename, should_convert) in test_cases.items():
filepath = os.path.join(tmpdir, filename)
try:
print(f"Creating {filename} with codec {codec}...")
create_test_audio_file(codec, filepath)
needs_conversion, detected_codec = needs_audio_conversion(filepath, supported_codecs)
status = "" if needs_conversion == should_convert else ""
print(f" {status} {codec}: needs_conversion={needs_conversion} (expected {should_convert})")
print(f" Detected codec: {detected_codec}")
except Exception as e:
print(f" ✗ Failed to test {codec}: {e}")
print()
def test_misnamed_file():
"""Test detection of files with wrong extensions."""
print("\n=== Testing Misnamed File Detection ===\n")
with tempfile.TemporaryDirectory() as tmpdir:
# Create an MP3 file but name it .wav
wrong_name_path = os.path.join(tmpdir, 'actually_mp3.wav')
try:
print("Creating MP3 file with .wav extension...")
create_test_audio_file('mp3', wrong_name_path)
codec_info = get_codec_info(wrong_name_path)
detected_codec = codec_info['audio_codec']
print(f" Filename: actually_mp3.wav")
print(f" Detected codec: {detected_codec}")
if detected_codec == 'mp3':
print(" ✓ Correctly detected MP3 codec despite .wav extension")
else:
print(f" ✗ Incorrectly detected as {detected_codec}")
# Create a FLAC file but name it .mp3
wrong_name_path2 = os.path.join(tmpdir, 'actually_flac.mp3')
print("\nCreating FLAC file with .mp3 extension...")
create_test_audio_file('flac', wrong_name_path2)
codec_info = get_codec_info(wrong_name_path2)
detected_codec = codec_info['audio_codec']
print(f" Filename: actually_flac.mp3")
print(f" Detected codec: {detected_codec}")
if detected_codec == 'flac':
print(" ✓ Correctly detected FLAC codec despite .mp3 extension")
else:
print(f" ✗ Incorrectly detected as {detected_codec}")
print()
except Exception as e:
print(f"✗ Failed to test misnamed files: {e}\n")
def test_duration():
"""Test duration extraction."""
print("\n=== Testing Duration Extraction ===\n")
with tempfile.TemporaryDirectory() as tmpdir:
durations = [1.0, 2.5, 5.0]
for expected_duration in durations:
filepath = os.path.join(tmpdir, f'test_{expected_duration}s.mp3')
try:
print(f"Creating {expected_duration}s audio file...")
create_test_audio_file('mp3', filepath, duration=expected_duration)
detected_duration = get_duration(filepath)
# Allow 0.1s tolerance for encoding variations
if detected_duration and abs(detected_duration - expected_duration) < 0.1:
print(f" ✓ Duration: {detected_duration:.2f}s (expected {expected_duration}s)")
else:
print(f" ✗ Duration: {detected_duration:.2f}s (expected {expected_duration}s)")
except Exception as e:
print(f" ✗ Failed to test duration: {e}")
print()
def main():
"""Run all tests."""
print("=" * 60)
print("FFProbe Codec Detection Test Suite")
print("=" * 60)
# Check if ffmpeg/ffprobe are available
try:
subprocess.run(['ffprobe', '-version'], capture_output=True, check=True)
subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
except (FileNotFoundError, subprocess.CalledProcessError):
print("\n✗ Error: ffmpeg/ffprobe not found. Please install ffmpeg to run tests.\n")
return 1
try:
test_codec_detection()
test_video_detection()
test_lossless_detection()
test_conversion_check()
test_misnamed_file()
test_duration()
print("=" * 60)
print("All tests completed!")
print("=" * 60)
print()
return 0
except Exception as e:
print(f"\n✗ Test suite failed with error: {e}\n")
import traceback
traceback.print_exc()
return 1
if __name__ == '__main__':
sys.exit(main())