Files
dictia-public/tests/test_video_passthrough.py

436 lines
20 KiB
Python

"""
Test suite for the VIDEO_PASSTHROUGH_ASR feature.
Tests configuration, code path correctness, and interaction with VIDEO_RETENTION
across all entry points (processing pipeline, upload handler, file monitor, incognito).
Uses static analysis — no running server or real video files required.
Run with: python tests/test_video_passthrough.py
"""
import os
import re
import sys
import unittest
from pathlib import Path
TEST_DIR = os.path.dirname(os.path.abspath(__file__))
PROJECT_ROOT = os.path.dirname(TEST_DIR)
sys.path.insert(0, PROJECT_ROOT)
def read_file(rel_path):
with open(os.path.join(PROJECT_ROOT, rel_path), 'r') as f:
return f.read()
# Cache file contents once — they don't change during the run
PROCESSING = read_file('src/tasks/processing.py')
RECORDINGS = read_file('src/api/recordings.py')
FILE_MONITOR = read_file('src/file_monitor.py')
APP_CONFIG = read_file('src/config/app_config.py')
ENV_EXAMPLE = read_file('config/env.transcription.example')
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def get_function_body(source, func_name):
"""Extract the body of a top-level function from source code."""
pattern = rf'^def {func_name}\('
lines = source.split('\n')
start = None
for i, line in enumerate(lines):
if re.match(pattern, line):
start = i
break
if start is None:
return ''
# Collect until next top-level def or class or EOF
body_lines = [lines[start]]
for line in lines[start + 1:]:
if line and not line[0].isspace() and (line.startswith('def ') or line.startswith('class ')):
break
body_lines.append(line)
return '\n'.join(body_lines)
def split_at_incognito(source):
"""Split processing.py into main and incognito sections."""
marker = 'def transcribe_incognito('
idx = source.find(marker)
if idx == -1:
return source, ''
return source[:idx], source[idx:]
PROCESSING_MAIN, PROCESSING_INCOGNITO = split_at_incognito(PROCESSING)
# ===========================================================================
# 1. Configuration
# ===========================================================================
class TestPassthroughConfig(unittest.TestCase):
"""VIDEO_PASSTHROUGH_ASR env var is defined and defaults to false."""
FILES_THAT_NEED_IT = [
('src/config/app_config.py', APP_CONFIG),
('src/tasks/processing.py', PROCESSING),
('src/api/recordings.py', RECORDINGS),
('src/file_monitor.py', FILE_MONITOR),
]
def test_defined_in_all_files(self):
for rel_path, content in self.FILES_THAT_NEED_IT:
with self.subTest(file=rel_path):
self.assertIn('VIDEO_PASSTHROUGH_ASR', content,
f"VIDEO_PASSTHROUGH_ASR missing from {rel_path}")
def test_default_is_false_everywhere(self):
for rel_path, content in self.FILES_THAT_NEED_IT:
match = re.search(
r"VIDEO_PASSTHROUGH_ASR\s*=\s*os\.environ\.get\('VIDEO_PASSTHROUGH_ASR',\s*'(\w+)'\)",
content
)
if match:
with self.subTest(file=rel_path):
self.assertEqual(match.group(1), 'false',
f"Default should be 'false' in {rel_path}")
def test_canonical_definition_in_app_config(self):
self.assertIn(
"VIDEO_PASSTHROUGH_ASR = os.environ.get('VIDEO_PASSTHROUGH_ASR', 'false').lower() == 'true'",
APP_CONFIG
)
def test_documented_in_env_example(self):
self.assertIn('VIDEO_PASSTHROUGH_ASR', ENV_EXAMPLE)
def test_processing_imports_from_config(self):
self.assertIn('VIDEO_PASSTHROUGH_ASR', PROCESSING)
# Should import from app_config, not read os.environ directly
self.assertIn('import', PROCESSING)
# Verify it's in an import line from app_config
import_lines = [l for l in PROCESSING.split('\n')
if 'from src.config.app_config import' in l]
found = any('VIDEO_PASSTHROUGH_ASR' in l for l in import_lines)
self.assertTrue(found, "processing.py should import VIDEO_PASSTHROUGH_ASR from app_config")
# ===========================================================================
# 2. Processing pipeline — main transcription path
# ===========================================================================
class TestProcessingMainPath(unittest.TestCase):
"""Test transcribe_with_connector() video passthrough code paths."""
def test_passthrough_branch_exists_before_retention(self):
"""VIDEO_PASSTHROUGH_ASR is checked before VIDEO_RETENTION in the is_video block."""
# Inside the `if is_video:` block, passthrough should be the first check
video_block_start = PROCESSING_MAIN.find('if is_video:')
self.assertNotEqual(video_block_start, -1)
after_video = PROCESSING_MAIN[video_block_start:]
passthrough_pos = after_video.find('if VIDEO_PASSTHROUGH_ASR:')
retention_pos = after_video.find('elif VIDEO_RETENTION:')
self.assertNotEqual(passthrough_pos, -1, "Missing VIDEO_PASSTHROUGH_ASR check in is_video block")
self.assertNotEqual(retention_pos, -1, "Missing elif VIDEO_RETENTION check")
self.assertLess(passthrough_pos, retention_pos,
"VIDEO_PASSTHROUGH_ASR should be checked before VIDEO_RETENTION")
def test_passthrough_does_not_call_extract_audio(self):
"""The passthrough branch must not call extract_audio_from_video."""
video_block = PROCESSING_MAIN[PROCESSING_MAIN.find('if is_video:'):]
# Find the passthrough branch (from `if VIDEO_PASSTHROUGH_ASR:` to `elif VIDEO_RETENTION:`)
pt_start = video_block.find('if VIDEO_PASSTHROUGH_ASR:')
pt_end = video_block.find('elif VIDEO_RETENTION:')
passthrough_block = video_block[pt_start:pt_end]
self.assertNotIn('extract_audio_from_video', passthrough_block,
"Passthrough branch should NOT extract audio")
def test_passthrough_keeps_original_filepath(self):
"""Passthrough sets actual_filepath = filepath (the original video)."""
video_block = PROCESSING_MAIN[PROCESSING_MAIN.find('if is_video:'):]
pt_start = video_block.find('if VIDEO_PASSTHROUGH_ASR:')
pt_end = video_block.find('elif VIDEO_RETENTION:')
passthrough_block = video_block[pt_start:pt_end]
self.assertIn('actual_filepath = filepath', passthrough_block)
def test_passthrough_with_retention_sets_recording_path(self):
"""When both passthrough and retention are on, recording.audio_path is set."""
video_block = PROCESSING_MAIN[PROCESSING_MAIN.find('if is_video:'):]
pt_start = video_block.find('if VIDEO_PASSTHROUGH_ASR:')
pt_end = video_block.find('elif VIDEO_RETENTION:')
passthrough_block = video_block[pt_start:pt_end]
self.assertIn('if VIDEO_RETENTION:', passthrough_block,
"Passthrough branch should conditionally handle retention")
self.assertIn('recording.audio_path = filepath', passthrough_block)
self.assertIn("mimetypes.guess_type(filepath)", passthrough_block)
def test_video_passthrough_active_flag_set(self):
"""video_passthrough_active flag is computed from is_video and VIDEO_PASSTHROUGH_ASR."""
self.assertIn('video_passthrough_active = is_video and VIDEO_PASSTHROUGH_ASR',
PROCESSING_MAIN)
def test_conversion_skipped_when_passthrough(self):
"""convert_if_needed is inside an else block gated by video_passthrough_active."""
self.assertIn('if video_passthrough_active:', PROCESSING_MAIN)
# The conversion call should be in the else branch
flag_pos = PROCESSING_MAIN.find('video_passthrough_active = is_video and VIDEO_PASSTHROUGH_ASR')
after_flag = PROCESSING_MAIN[flag_pos:]
passthrough_if = after_flag.find('if video_passthrough_active:')
else_pos = after_flag.find('else:', passthrough_if)
convert_pos = after_flag.find('convert_if_needed(', else_pos)
self.assertGreater(convert_pos, else_pos,
"convert_if_needed should be in else branch after passthrough check")
def test_chunking_skipped_when_passthrough(self):
"""Chunking evaluates to False when video_passthrough_active."""
# Find the chunking decision area after the flag
flag_pos = PROCESSING_MAIN.find('video_passthrough_active = is_video and VIDEO_PASSTHROUGH_ASR')
after_flag = PROCESSING_MAIN[flag_pos:]
self.assertIn('if video_passthrough_active:\n should_chunk = False', after_flag)
def test_conversion_still_runs_for_non_passthrough(self):
"""convert_if_needed still runs when passthrough is off or file is audio."""
# The else branch of the passthrough check should contain convert_if_needed
self.assertIn('conversion_result = convert_if_needed(', PROCESSING_MAIN)
def test_chunking_still_evaluated_for_non_passthrough(self):
"""Chunking is still evaluated normally when passthrough is not active."""
self.assertIn('chunking_service.needs_chunking(actual_filepath, False, connector_specs)',
PROCESSING_MAIN)
# ===========================================================================
# 3. Processing pipeline — VIDEO_RETENTION paths still intact
# ===========================================================================
class TestRetentionNotBroken(unittest.TestCase):
"""Existing VIDEO_RETENTION behavior must be preserved."""
def test_retention_branch_still_extracts_audio(self):
"""elif VIDEO_RETENTION branch still calls extract_audio_from_video."""
video_block = PROCESSING_MAIN[PROCESSING_MAIN.find('if is_video:'):]
ret_start = video_block.find('elif VIDEO_RETENTION:')
# Find next else: at the same indent level
after_ret = video_block[ret_start:]
else_pos = after_ret.find('\n else:')
retention_block = after_ret[:else_pos] if else_pos != -1 else after_ret[:500]
self.assertIn('extract_audio_from_video(filepath, cleanup_original=False)',
retention_block)
def test_default_branch_still_extracts_and_deletes(self):
"""The final else branch extracts audio with default cleanup (deletes video)."""
video_block = PROCESSING_MAIN[PROCESSING_MAIN.find('if is_video:'):]
# The last else in the is_video block
self.assertIn('extract_audio_from_video(filepath)', video_block)
def test_temp_audio_cleanup_still_present(self):
"""Temp audio from retention is still cleaned up after transcription."""
self.assertIn('is_video and VIDEO_RETENTION and audio_filepath', PROCESSING_MAIN)
self.assertIn('Cleaned up temp audio from video retention', PROCESSING_MAIN)
# ===========================================================================
# 4. Incognito path
# ===========================================================================
class TestIncognitoPassthrough(unittest.TestCase):
"""Test passthrough in the incognito transcription path."""
def test_passthrough_flag_set_in_incognito(self):
"""video_passthrough_active is computed in incognito path."""
self.assertIn('video_passthrough_active = is_video and VIDEO_PASSTHROUGH_ASR',
PROCESSING_INCOGNITO)
def test_passthrough_skips_extraction_in_incognito(self):
"""When passthrough is on, incognito skips extract_audio_from_video."""
# The passthrough branch logs and does NOT extract
self.assertIn('[Incognito] Video passthrough: sending original video to ASR',
PROCESSING_INCOGNITO)
def test_passthrough_skips_conversion_in_incognito(self):
"""When passthrough is on, incognito skips convert_if_needed."""
self.assertIn('[Incognito] Video passthrough: skipping codec conversion',
PROCESSING_INCOGNITO)
def test_passthrough_skips_chunking_in_incognito(self):
"""When passthrough is on, incognito chunking is False."""
body = PROCESSING_INCOGNITO
self.assertIn('if video_passthrough_active:\n should_chunk = False', body)
def test_incognito_does_not_reference_video_retention(self):
"""Incognito path should NOT reference VIDEO_RETENTION (no retention in incognito)."""
self.assertNotIn('VIDEO_RETENTION', PROCESSING_INCOGNITO)
def test_incognito_still_extracts_without_passthrough(self):
"""Without passthrough, incognito still extracts audio from video."""
self.assertIn('extract_audio_from_video(filepath, cleanup_original=False)',
PROCESSING_INCOGNITO)
def test_incognito_still_converts_without_passthrough(self):
"""Without passthrough, incognito still runs convert_if_needed."""
self.assertIn('convert_if_needed(', PROCESSING_INCOGNITO)
# ===========================================================================
# 5. Upload handler (recordings.py)
# ===========================================================================
class TestUploadHandlerPassthrough(unittest.TestCase):
"""Test recordings.py upload handler respects VIDEO_PASSTHROUGH_ASR."""
def test_skip_conversion_for_passthrough_video(self):
"""Upload handler skips conversion when passthrough or retention + video."""
self.assertIn('VIDEO_RETENTION or VIDEO_PASSTHROUGH_ASR) and has_video', RECORDINGS)
def test_extension_fallback_checks_passthrough(self):
"""Extension-based video detection also fires for VIDEO_PASSTHROUGH_ASR."""
self.assertIn('VIDEO_RETENTION or VIDEO_PASSTHROUGH_ASR', RECORDINGS)
def test_convert_if_needed_still_in_else(self):
"""convert_if_needed still runs for audio files or when both flags are off."""
self.assertIn('convert_if_needed(', RECORDINGS)
def test_passthrough_log_message(self):
"""Upload handler logs which mode caused the skip."""
self.assertIn("'VIDEO_PASSTHROUGH_ASR'", RECORDINGS)
# ===========================================================================
# 6. File monitor
# ===========================================================================
class TestFileMonitorPassthrough(unittest.TestCase):
"""Test file_monitor.py respects VIDEO_PASSTHROUGH_ASR."""
def test_passthrough_defined(self):
self.assertIn('VIDEO_PASSTHROUGH_ASR', FILE_MONITOR)
def test_skip_conversion_for_passthrough_or_retention(self):
"""File monitor skips conversion when passthrough or retention + video."""
self.assertIn('VIDEO_PASSTHROUGH_ASR or VIDEO_RETENTION) and has_video', FILE_MONITOR)
def test_convert_if_needed_in_else_branch(self):
"""convert_if_needed is in the else branch, not inside the skip block."""
lines = FILE_MONITOR.split('\n')
in_skip_block = False
found_else = False
for i, line in enumerate(lines):
if 'VIDEO_PASSTHROUGH_ASR or VIDEO_RETENTION) and has_video' in line:
in_skip_block = True
elif in_skip_block and line.strip().startswith('else:'):
in_skip_block = False
found_else = True
elif in_skip_block and 'convert_if_needed' in line:
self.fail(f"convert_if_needed inside skip block at line {i + 1}")
self.assertTrue(found_else, "Should have else branch after passthrough/retention skip")
def test_log_distinguishes_passthrough_from_retention(self):
"""Log message indicates whether passthrough or retention caused the skip."""
self.assertIn("'passthrough'", FILE_MONITOR)
self.assertIn("'retention'", FILE_MONITOR)
# ===========================================================================
# 7. Audio files unaffected by passthrough
# ===========================================================================
class TestAudioUnaffected(unittest.TestCase):
"""VIDEO_PASSTHROUGH_ASR must only affect video files, never audio."""
def test_passthrough_flag_gated_on_is_video(self):
"""video_passthrough_active is always `is_video and VIDEO_PASSTHROUGH_ASR`."""
# Main path
self.assertIn('video_passthrough_active = is_video and VIDEO_PASSTHROUGH_ASR',
PROCESSING_MAIN)
# Incognito path
self.assertIn('video_passthrough_active = is_video and VIDEO_PASSTHROUGH_ASR',
PROCESSING_INCOGNITO)
def test_upload_handler_gated_on_has_video(self):
"""Upload handler skip is gated on `has_video`."""
self.assertIn('and has_video', RECORDINGS)
def test_file_monitor_gated_on_has_video(self):
"""File monitor skip is gated on `has_video`."""
self.assertIn('and has_video', FILE_MONITOR)
# ===========================================================================
# 8. Documentation
# ===========================================================================
class TestDocumentation(unittest.TestCase):
"""VIDEO_PASSTHROUGH_ASR is documented in all relevant places."""
DOC_FILES = [
'config/env.transcription.example',
'docs/admin-guide/system-settings.md',
'docs/features.md',
'docs/getting-started/installation.md',
]
def test_documented_in_all_relevant_files(self):
for rel_path in self.DOC_FILES:
content = read_file(rel_path)
with self.subTest(file=rel_path):
self.assertIn('VIDEO_PASSTHROUGH_ASR', content,
f"VIDEO_PASSTHROUGH_ASR missing from {rel_path}")
def test_env_example_commented_out_by_default(self):
"""The env example has the option commented out (opt-in)."""
self.assertIn('# VIDEO_PASSTHROUGH_ASR=false', ENV_EXAMPLE)
def test_docs_warn_about_asr_compatibility(self):
"""Docs warn that standard APIs will reject video input."""
system_settings = read_file('docs/admin-guide/system-settings.md')
installation = read_file('docs/getting-started/installation.md')
self.assertIn('reject', system_settings.lower())
self.assertIn('reject', installation.lower())
# ===========================================================================
# 9. Interaction matrix — structural verification
# ===========================================================================
class TestInteractionMatrix(unittest.TestCase):
"""
Verify the 3-way branch structure in processing.py:
if VIDEO_PASSTHROUGH_ASR → passthrough
elif VIDEO_RETENTION → retention
else → default extraction
"""
def test_three_way_branch_in_main_path(self):
"""Main path has if/elif/else for passthrough/retention/default."""
video_block = PROCESSING_MAIN[PROCESSING_MAIN.find('if is_video:'):]
# All three branches present in order
pt_pos = video_block.find('if VIDEO_PASSTHROUGH_ASR:')
ret_pos = video_block.find('elif VIDEO_RETENTION:')
else_pos = video_block.find('\n else:', ret_pos)
self.assertNotEqual(pt_pos, -1)
self.assertNotEqual(ret_pos, -1)
self.assertNotEqual(else_pos, -1)
self.assertLess(pt_pos, ret_pos)
self.assertLess(ret_pos, else_pos)
def test_incognito_two_way_branch(self):
"""Incognito has if/else for passthrough/extract (no retention)."""
video_block = PROCESSING_INCOGNITO[PROCESSING_INCOGNITO.find('if is_video:'):]
pt_pos = video_block.find('if VIDEO_PASSTHROUGH_ASR:')
else_pos = video_block.find('\n else:', pt_pos)
self.assertNotEqual(pt_pos, -1)
self.assertNotEqual(else_pos, -1)
# No VIDEO_RETENTION in incognito
incognito_video_block = video_block[:500]
self.assertNotIn('VIDEO_RETENTION', incognito_video_block)
if __name__ == '__main__':
unittest.main(verbosity=2)