Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)
This commit is contained in:
435
tests/test_video_passthrough.py
Normal file
435
tests/test_video_passthrough.py
Normal file
@@ -0,0 +1,435 @@
|
||||
"""
|
||||
Test suite for the VIDEO_PASSTHROUGH_ASR feature.
|
||||
|
||||
Tests configuration, code path correctness, and interaction with VIDEO_RETENTION
|
||||
across all entry points (processing pipeline, upload handler, file monitor, incognito).
|
||||
Uses static analysis — no running server or real video files required.
|
||||
|
||||
Run with: python tests/test_video_passthrough.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
PROJECT_ROOT = os.path.dirname(TEST_DIR)
|
||||
sys.path.insert(0, PROJECT_ROOT)
|
||||
|
||||
|
||||
def read_file(rel_path):
|
||||
with open(os.path.join(PROJECT_ROOT, rel_path), 'r') as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
# Cache file contents once — they don't change during the run
|
||||
PROCESSING = read_file('src/tasks/processing.py')
|
||||
RECORDINGS = read_file('src/api/recordings.py')
|
||||
FILE_MONITOR = read_file('src/file_monitor.py')
|
||||
APP_CONFIG = read_file('src/config/app_config.py')
|
||||
ENV_EXAMPLE = read_file('config/env.transcription.example')
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_function_body(source, func_name):
|
||||
"""Extract the body of a top-level function from source code."""
|
||||
pattern = rf'^def {func_name}\('
|
||||
lines = source.split('\n')
|
||||
start = None
|
||||
for i, line in enumerate(lines):
|
||||
if re.match(pattern, line):
|
||||
start = i
|
||||
break
|
||||
if start is None:
|
||||
return ''
|
||||
# Collect until next top-level def or class or EOF
|
||||
body_lines = [lines[start]]
|
||||
for line in lines[start + 1:]:
|
||||
if line and not line[0].isspace() and (line.startswith('def ') or line.startswith('class ')):
|
||||
break
|
||||
body_lines.append(line)
|
||||
return '\n'.join(body_lines)
|
||||
|
||||
|
||||
def split_at_incognito(source):
|
||||
"""Split processing.py into main and incognito sections."""
|
||||
marker = 'def transcribe_incognito('
|
||||
idx = source.find(marker)
|
||||
if idx == -1:
|
||||
return source, ''
|
||||
return source[:idx], source[idx:]
|
||||
|
||||
|
||||
PROCESSING_MAIN, PROCESSING_INCOGNITO = split_at_incognito(PROCESSING)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 1. Configuration
|
||||
# ===========================================================================
|
||||
|
||||
class TestPassthroughConfig(unittest.TestCase):
|
||||
"""VIDEO_PASSTHROUGH_ASR env var is defined and defaults to false."""
|
||||
|
||||
FILES_THAT_NEED_IT = [
|
||||
('src/config/app_config.py', APP_CONFIG),
|
||||
('src/tasks/processing.py', PROCESSING),
|
||||
('src/api/recordings.py', RECORDINGS),
|
||||
('src/file_monitor.py', FILE_MONITOR),
|
||||
]
|
||||
|
||||
def test_defined_in_all_files(self):
|
||||
for rel_path, content in self.FILES_THAT_NEED_IT:
|
||||
with self.subTest(file=rel_path):
|
||||
self.assertIn('VIDEO_PASSTHROUGH_ASR', content,
|
||||
f"VIDEO_PASSTHROUGH_ASR missing from {rel_path}")
|
||||
|
||||
def test_default_is_false_everywhere(self):
|
||||
for rel_path, content in self.FILES_THAT_NEED_IT:
|
||||
match = re.search(
|
||||
r"VIDEO_PASSTHROUGH_ASR\s*=\s*os\.environ\.get\('VIDEO_PASSTHROUGH_ASR',\s*'(\w+)'\)",
|
||||
content
|
||||
)
|
||||
if match:
|
||||
with self.subTest(file=rel_path):
|
||||
self.assertEqual(match.group(1), 'false',
|
||||
f"Default should be 'false' in {rel_path}")
|
||||
|
||||
def test_canonical_definition_in_app_config(self):
|
||||
self.assertIn(
|
||||
"VIDEO_PASSTHROUGH_ASR = os.environ.get('VIDEO_PASSTHROUGH_ASR', 'false').lower() == 'true'",
|
||||
APP_CONFIG
|
||||
)
|
||||
|
||||
def test_documented_in_env_example(self):
|
||||
self.assertIn('VIDEO_PASSTHROUGH_ASR', ENV_EXAMPLE)
|
||||
|
||||
def test_processing_imports_from_config(self):
|
||||
self.assertIn('VIDEO_PASSTHROUGH_ASR', PROCESSING)
|
||||
# Should import from app_config, not read os.environ directly
|
||||
self.assertIn('import', PROCESSING)
|
||||
# Verify it's in an import line from app_config
|
||||
import_lines = [l for l in PROCESSING.split('\n')
|
||||
if 'from src.config.app_config import' in l]
|
||||
found = any('VIDEO_PASSTHROUGH_ASR' in l for l in import_lines)
|
||||
self.assertTrue(found, "processing.py should import VIDEO_PASSTHROUGH_ASR from app_config")
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 2. Processing pipeline — main transcription path
|
||||
# ===========================================================================
|
||||
|
||||
class TestProcessingMainPath(unittest.TestCase):
|
||||
"""Test transcribe_with_connector() video passthrough code paths."""
|
||||
|
||||
def test_passthrough_branch_exists_before_retention(self):
|
||||
"""VIDEO_PASSTHROUGH_ASR is checked before VIDEO_RETENTION in the is_video block."""
|
||||
# Inside the `if is_video:` block, passthrough should be the first check
|
||||
video_block_start = PROCESSING_MAIN.find('if is_video:')
|
||||
self.assertNotEqual(video_block_start, -1)
|
||||
|
||||
after_video = PROCESSING_MAIN[video_block_start:]
|
||||
passthrough_pos = after_video.find('if VIDEO_PASSTHROUGH_ASR:')
|
||||
retention_pos = after_video.find('elif VIDEO_RETENTION:')
|
||||
|
||||
self.assertNotEqual(passthrough_pos, -1, "Missing VIDEO_PASSTHROUGH_ASR check in is_video block")
|
||||
self.assertNotEqual(retention_pos, -1, "Missing elif VIDEO_RETENTION check")
|
||||
self.assertLess(passthrough_pos, retention_pos,
|
||||
"VIDEO_PASSTHROUGH_ASR should be checked before VIDEO_RETENTION")
|
||||
|
||||
def test_passthrough_does_not_call_extract_audio(self):
|
||||
"""The passthrough branch must not call extract_audio_from_video."""
|
||||
video_block = PROCESSING_MAIN[PROCESSING_MAIN.find('if is_video:'):]
|
||||
# Find the passthrough branch (from `if VIDEO_PASSTHROUGH_ASR:` to `elif VIDEO_RETENTION:`)
|
||||
pt_start = video_block.find('if VIDEO_PASSTHROUGH_ASR:')
|
||||
pt_end = video_block.find('elif VIDEO_RETENTION:')
|
||||
passthrough_block = video_block[pt_start:pt_end]
|
||||
self.assertNotIn('extract_audio_from_video', passthrough_block,
|
||||
"Passthrough branch should NOT extract audio")
|
||||
|
||||
def test_passthrough_keeps_original_filepath(self):
|
||||
"""Passthrough sets actual_filepath = filepath (the original video)."""
|
||||
video_block = PROCESSING_MAIN[PROCESSING_MAIN.find('if is_video:'):]
|
||||
pt_start = video_block.find('if VIDEO_PASSTHROUGH_ASR:')
|
||||
pt_end = video_block.find('elif VIDEO_RETENTION:')
|
||||
passthrough_block = video_block[pt_start:pt_end]
|
||||
self.assertIn('actual_filepath = filepath', passthrough_block)
|
||||
|
||||
def test_passthrough_with_retention_sets_recording_path(self):
|
||||
"""When both passthrough and retention are on, recording.audio_path is set."""
|
||||
video_block = PROCESSING_MAIN[PROCESSING_MAIN.find('if is_video:'):]
|
||||
pt_start = video_block.find('if VIDEO_PASSTHROUGH_ASR:')
|
||||
pt_end = video_block.find('elif VIDEO_RETENTION:')
|
||||
passthrough_block = video_block[pt_start:pt_end]
|
||||
self.assertIn('if VIDEO_RETENTION:', passthrough_block,
|
||||
"Passthrough branch should conditionally handle retention")
|
||||
self.assertIn('recording.audio_path = filepath', passthrough_block)
|
||||
self.assertIn("mimetypes.guess_type(filepath)", passthrough_block)
|
||||
|
||||
def test_video_passthrough_active_flag_set(self):
|
||||
"""video_passthrough_active flag is computed from is_video and VIDEO_PASSTHROUGH_ASR."""
|
||||
self.assertIn('video_passthrough_active = is_video and VIDEO_PASSTHROUGH_ASR',
|
||||
PROCESSING_MAIN)
|
||||
|
||||
def test_conversion_skipped_when_passthrough(self):
|
||||
"""convert_if_needed is inside an else block gated by video_passthrough_active."""
|
||||
self.assertIn('if video_passthrough_active:', PROCESSING_MAIN)
|
||||
# The conversion call should be in the else branch
|
||||
flag_pos = PROCESSING_MAIN.find('video_passthrough_active = is_video and VIDEO_PASSTHROUGH_ASR')
|
||||
after_flag = PROCESSING_MAIN[flag_pos:]
|
||||
passthrough_if = after_flag.find('if video_passthrough_active:')
|
||||
else_pos = after_flag.find('else:', passthrough_if)
|
||||
convert_pos = after_flag.find('convert_if_needed(', else_pos)
|
||||
self.assertGreater(convert_pos, else_pos,
|
||||
"convert_if_needed should be in else branch after passthrough check")
|
||||
|
||||
def test_chunking_skipped_when_passthrough(self):
|
||||
"""Chunking evaluates to False when video_passthrough_active."""
|
||||
# Find the chunking decision area after the flag
|
||||
flag_pos = PROCESSING_MAIN.find('video_passthrough_active = is_video and VIDEO_PASSTHROUGH_ASR')
|
||||
after_flag = PROCESSING_MAIN[flag_pos:]
|
||||
self.assertIn('if video_passthrough_active:\n should_chunk = False', after_flag)
|
||||
|
||||
def test_conversion_still_runs_for_non_passthrough(self):
|
||||
"""convert_if_needed still runs when passthrough is off or file is audio."""
|
||||
# The else branch of the passthrough check should contain convert_if_needed
|
||||
self.assertIn('conversion_result = convert_if_needed(', PROCESSING_MAIN)
|
||||
|
||||
def test_chunking_still_evaluated_for_non_passthrough(self):
|
||||
"""Chunking is still evaluated normally when passthrough is not active."""
|
||||
self.assertIn('chunking_service.needs_chunking(actual_filepath, False, connector_specs)',
|
||||
PROCESSING_MAIN)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 3. Processing pipeline — VIDEO_RETENTION paths still intact
|
||||
# ===========================================================================
|
||||
|
||||
class TestRetentionNotBroken(unittest.TestCase):
|
||||
"""Existing VIDEO_RETENTION behavior must be preserved."""
|
||||
|
||||
def test_retention_branch_still_extracts_audio(self):
|
||||
"""elif VIDEO_RETENTION branch still calls extract_audio_from_video."""
|
||||
video_block = PROCESSING_MAIN[PROCESSING_MAIN.find('if is_video:'):]
|
||||
ret_start = video_block.find('elif VIDEO_RETENTION:')
|
||||
# Find next else: at the same indent level
|
||||
after_ret = video_block[ret_start:]
|
||||
else_pos = after_ret.find('\n else:')
|
||||
retention_block = after_ret[:else_pos] if else_pos != -1 else after_ret[:500]
|
||||
self.assertIn('extract_audio_from_video(filepath, cleanup_original=False)',
|
||||
retention_block)
|
||||
|
||||
def test_default_branch_still_extracts_and_deletes(self):
|
||||
"""The final else branch extracts audio with default cleanup (deletes video)."""
|
||||
video_block = PROCESSING_MAIN[PROCESSING_MAIN.find('if is_video:'):]
|
||||
# The last else in the is_video block
|
||||
self.assertIn('extract_audio_from_video(filepath)', video_block)
|
||||
|
||||
def test_temp_audio_cleanup_still_present(self):
|
||||
"""Temp audio from retention is still cleaned up after transcription."""
|
||||
self.assertIn('is_video and VIDEO_RETENTION and audio_filepath', PROCESSING_MAIN)
|
||||
self.assertIn('Cleaned up temp audio from video retention', PROCESSING_MAIN)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 4. Incognito path
|
||||
# ===========================================================================
|
||||
|
||||
class TestIncognitoPassthrough(unittest.TestCase):
|
||||
"""Test passthrough in the incognito transcription path."""
|
||||
|
||||
def test_passthrough_flag_set_in_incognito(self):
|
||||
"""video_passthrough_active is computed in incognito path."""
|
||||
self.assertIn('video_passthrough_active = is_video and VIDEO_PASSTHROUGH_ASR',
|
||||
PROCESSING_INCOGNITO)
|
||||
|
||||
def test_passthrough_skips_extraction_in_incognito(self):
|
||||
"""When passthrough is on, incognito skips extract_audio_from_video."""
|
||||
# The passthrough branch logs and does NOT extract
|
||||
self.assertIn('[Incognito] Video passthrough: sending original video to ASR',
|
||||
PROCESSING_INCOGNITO)
|
||||
|
||||
def test_passthrough_skips_conversion_in_incognito(self):
|
||||
"""When passthrough is on, incognito skips convert_if_needed."""
|
||||
self.assertIn('[Incognito] Video passthrough: skipping codec conversion',
|
||||
PROCESSING_INCOGNITO)
|
||||
|
||||
def test_passthrough_skips_chunking_in_incognito(self):
|
||||
"""When passthrough is on, incognito chunking is False."""
|
||||
body = PROCESSING_INCOGNITO
|
||||
self.assertIn('if video_passthrough_active:\n should_chunk = False', body)
|
||||
|
||||
def test_incognito_does_not_reference_video_retention(self):
|
||||
"""Incognito path should NOT reference VIDEO_RETENTION (no retention in incognito)."""
|
||||
self.assertNotIn('VIDEO_RETENTION', PROCESSING_INCOGNITO)
|
||||
|
||||
def test_incognito_still_extracts_without_passthrough(self):
|
||||
"""Without passthrough, incognito still extracts audio from video."""
|
||||
self.assertIn('extract_audio_from_video(filepath, cleanup_original=False)',
|
||||
PROCESSING_INCOGNITO)
|
||||
|
||||
def test_incognito_still_converts_without_passthrough(self):
|
||||
"""Without passthrough, incognito still runs convert_if_needed."""
|
||||
self.assertIn('convert_if_needed(', PROCESSING_INCOGNITO)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 5. Upload handler (recordings.py)
|
||||
# ===========================================================================
|
||||
|
||||
class TestUploadHandlerPassthrough(unittest.TestCase):
|
||||
"""Test recordings.py upload handler respects VIDEO_PASSTHROUGH_ASR."""
|
||||
|
||||
def test_skip_conversion_for_passthrough_video(self):
|
||||
"""Upload handler skips conversion when passthrough or retention + video."""
|
||||
self.assertIn('VIDEO_RETENTION or VIDEO_PASSTHROUGH_ASR) and has_video', RECORDINGS)
|
||||
|
||||
def test_extension_fallback_checks_passthrough(self):
|
||||
"""Extension-based video detection also fires for VIDEO_PASSTHROUGH_ASR."""
|
||||
self.assertIn('VIDEO_RETENTION or VIDEO_PASSTHROUGH_ASR', RECORDINGS)
|
||||
|
||||
def test_convert_if_needed_still_in_else(self):
|
||||
"""convert_if_needed still runs for audio files or when both flags are off."""
|
||||
self.assertIn('convert_if_needed(', RECORDINGS)
|
||||
|
||||
def test_passthrough_log_message(self):
|
||||
"""Upload handler logs which mode caused the skip."""
|
||||
self.assertIn("'VIDEO_PASSTHROUGH_ASR'", RECORDINGS)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 6. File monitor
|
||||
# ===========================================================================
|
||||
|
||||
class TestFileMonitorPassthrough(unittest.TestCase):
|
||||
"""Test file_monitor.py respects VIDEO_PASSTHROUGH_ASR."""
|
||||
|
||||
def test_passthrough_defined(self):
|
||||
self.assertIn('VIDEO_PASSTHROUGH_ASR', FILE_MONITOR)
|
||||
|
||||
def test_skip_conversion_for_passthrough_or_retention(self):
|
||||
"""File monitor skips conversion when passthrough or retention + video."""
|
||||
self.assertIn('VIDEO_PASSTHROUGH_ASR or VIDEO_RETENTION) and has_video', FILE_MONITOR)
|
||||
|
||||
def test_convert_if_needed_in_else_branch(self):
|
||||
"""convert_if_needed is in the else branch, not inside the skip block."""
|
||||
lines = FILE_MONITOR.split('\n')
|
||||
in_skip_block = False
|
||||
found_else = False
|
||||
for i, line in enumerate(lines):
|
||||
if 'VIDEO_PASSTHROUGH_ASR or VIDEO_RETENTION) and has_video' in line:
|
||||
in_skip_block = True
|
||||
elif in_skip_block and line.strip().startswith('else:'):
|
||||
in_skip_block = False
|
||||
found_else = True
|
||||
elif in_skip_block and 'convert_if_needed' in line:
|
||||
self.fail(f"convert_if_needed inside skip block at line {i + 1}")
|
||||
self.assertTrue(found_else, "Should have else branch after passthrough/retention skip")
|
||||
|
||||
def test_log_distinguishes_passthrough_from_retention(self):
|
||||
"""Log message indicates whether passthrough or retention caused the skip."""
|
||||
self.assertIn("'passthrough'", FILE_MONITOR)
|
||||
self.assertIn("'retention'", FILE_MONITOR)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 7. Audio files unaffected by passthrough
|
||||
# ===========================================================================
|
||||
|
||||
class TestAudioUnaffected(unittest.TestCase):
|
||||
"""VIDEO_PASSTHROUGH_ASR must only affect video files, never audio."""
|
||||
|
||||
def test_passthrough_flag_gated_on_is_video(self):
|
||||
"""video_passthrough_active is always `is_video and VIDEO_PASSTHROUGH_ASR`."""
|
||||
# Main path
|
||||
self.assertIn('video_passthrough_active = is_video and VIDEO_PASSTHROUGH_ASR',
|
||||
PROCESSING_MAIN)
|
||||
# Incognito path
|
||||
self.assertIn('video_passthrough_active = is_video and VIDEO_PASSTHROUGH_ASR',
|
||||
PROCESSING_INCOGNITO)
|
||||
|
||||
def test_upload_handler_gated_on_has_video(self):
|
||||
"""Upload handler skip is gated on `has_video`."""
|
||||
self.assertIn('and has_video', RECORDINGS)
|
||||
|
||||
def test_file_monitor_gated_on_has_video(self):
|
||||
"""File monitor skip is gated on `has_video`."""
|
||||
self.assertIn('and has_video', FILE_MONITOR)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 8. Documentation
|
||||
# ===========================================================================
|
||||
|
||||
class TestDocumentation(unittest.TestCase):
|
||||
"""VIDEO_PASSTHROUGH_ASR is documented in all relevant places."""
|
||||
|
||||
DOC_FILES = [
|
||||
'config/env.transcription.example',
|
||||
'docs/admin-guide/system-settings.md',
|
||||
'docs/features.md',
|
||||
'docs/getting-started/installation.md',
|
||||
]
|
||||
|
||||
def test_documented_in_all_relevant_files(self):
|
||||
for rel_path in self.DOC_FILES:
|
||||
content = read_file(rel_path)
|
||||
with self.subTest(file=rel_path):
|
||||
self.assertIn('VIDEO_PASSTHROUGH_ASR', content,
|
||||
f"VIDEO_PASSTHROUGH_ASR missing from {rel_path}")
|
||||
|
||||
def test_env_example_commented_out_by_default(self):
|
||||
"""The env example has the option commented out (opt-in)."""
|
||||
self.assertIn('# VIDEO_PASSTHROUGH_ASR=false', ENV_EXAMPLE)
|
||||
|
||||
def test_docs_warn_about_asr_compatibility(self):
|
||||
"""Docs warn that standard APIs will reject video input."""
|
||||
system_settings = read_file('docs/admin-guide/system-settings.md')
|
||||
installation = read_file('docs/getting-started/installation.md')
|
||||
self.assertIn('reject', system_settings.lower())
|
||||
self.assertIn('reject', installation.lower())
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# 9. Interaction matrix — structural verification
|
||||
# ===========================================================================
|
||||
|
||||
class TestInteractionMatrix(unittest.TestCase):
|
||||
"""
|
||||
Verify the 3-way branch structure in processing.py:
|
||||
if VIDEO_PASSTHROUGH_ASR → passthrough
|
||||
elif VIDEO_RETENTION → retention
|
||||
else → default extraction
|
||||
"""
|
||||
|
||||
def test_three_way_branch_in_main_path(self):
|
||||
"""Main path has if/elif/else for passthrough/retention/default."""
|
||||
video_block = PROCESSING_MAIN[PROCESSING_MAIN.find('if is_video:'):]
|
||||
# All three branches present in order
|
||||
pt_pos = video_block.find('if VIDEO_PASSTHROUGH_ASR:')
|
||||
ret_pos = video_block.find('elif VIDEO_RETENTION:')
|
||||
else_pos = video_block.find('\n else:', ret_pos)
|
||||
self.assertNotEqual(pt_pos, -1)
|
||||
self.assertNotEqual(ret_pos, -1)
|
||||
self.assertNotEqual(else_pos, -1)
|
||||
self.assertLess(pt_pos, ret_pos)
|
||||
self.assertLess(ret_pos, else_pos)
|
||||
|
||||
def test_incognito_two_way_branch(self):
|
||||
"""Incognito has if/else for passthrough/extract (no retention)."""
|
||||
video_block = PROCESSING_INCOGNITO[PROCESSING_INCOGNITO.find('if is_video:'):]
|
||||
pt_pos = video_block.find('if VIDEO_PASSTHROUGH_ASR:')
|
||||
else_pos = video_block.find('\n else:', pt_pos)
|
||||
self.assertNotEqual(pt_pos, -1)
|
||||
self.assertNotEqual(else_pos, -1)
|
||||
# No VIDEO_RETENTION in incognito
|
||||
incognito_video_block = video_block[:500]
|
||||
self.assertNotIn('VIDEO_RETENTION', incognito_video_block)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main(verbosity=2)
|
||||
Reference in New Issue
Block a user