""" Test suite to ensure database migrations are compatible with both SQLite and PostgreSQL. These tests scan the init_db.py file for patterns that would break on PostgreSQL, such as SQLite-only boolean defaults (0/1 instead of FALSE/TRUE) and unquoted reserved keywords. Run with: python tests/test_migration_compatibility.py """ import re import unittest import os class TestMigrationCompatibility(unittest.TestCase): """Tests to ensure init_db.py uses cross-database compatible SQL.""" @classmethod def setUpClass(cls): """Load init_db.py content once for all tests.""" # Find the project root test_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(test_dir) init_db_path = os.path.join(project_root, 'src', 'init_db.py') with open(init_db_path, 'r') as f: cls.content = f.read() def test_no_raw_boolean_defaults_in_alter_table(self): """ Ensure no raw ALTER TABLE statements use SQLite-only boolean defaults. The pattern 'BOOLEAN DEFAULT 0' or 'BOOLEAN DEFAULT 1' in raw SQL will fail on PostgreSQL, which requires 'DEFAULT FALSE' or 'DEFAULT TRUE'. Using add_column_if_not_exists() handles this conversion automatically. """ # Pattern to find raw SQL with text() that has BOOLEAN DEFAULT 0/1 # This matches: text('... BOOLEAN DEFAULT 0 ...') or text("...") pattern = r"conn\.execute\s*\(\s*text\s*\(['\"]([^'\"]*BOOLEAN\s+DEFAULT\s+[01][^'\"]*)['\"]" matches = re.findall(pattern, self.content, re.IGNORECASE) # Filter out false positives - we're looking for raw ALTER TABLE statements # not UPDATE statements or other SQL that legitimately uses 0/1 problematic = [] for match in matches: match_upper = match.upper() # Only flag if it's an ALTER TABLE with BOOLEAN DEFAULT 0/1 if 'ALTER TABLE' in match_upper and 'BOOLEAN' in match_upper: if 'DEFAULT 0' in match or 'DEFAULT 1' in match: problematic.append(match) self.assertEqual( len(problematic), 0, f"Found SQLite-only boolean defaults in raw ALTER TABLE statements. " f"Use add_column_if_not_exists() instead:\n" + "\n".join(f" - {m[:100]}..." if len(m) > 100 else f" - {m}" for m in problematic) ) def test_no_boolean_integer_comparisons_in_raw_sql(self): """ Ensure raw SQL doesn't compare boolean columns to integers (0/1). PostgreSQL strictly separates boolean and integer types: - 'column = 1' fails with 'operator does not exist: boolean = integer' - 'column = TRUE' works on both SQLite (3.23+) and PostgreSQL Known boolean columns in migrations: protect_from_deletion, email_verified, auto_share_on_apply, share_with_group_lead, is_inbox, is_highlighted, deletion_exempt, is_admin, can_share_publicly. """ boolean_columns = [ 'protect_from_deletion', 'email_verified', 'auto_share_on_apply', 'share_with_group_lead', 'is_inbox', 'is_highlighted', 'deletion_exempt', 'is_admin', 'can_share_publicly', 'auto_speaker_labelling', 'auto_summarization' ] # Find raw SQL in text() calls sql_pattern = r"text\s*\(\s*['\"\"]\"\"(.*?)['\"\"]\"\"?\s*\)" # Simpler: find lines with known boolean column = 0 or = 1 problematic = [] for col in boolean_columns: # Match: column = 0 or column = 1 (not = TRUE/FALSE) pattern = rf"{col}\s*=\s*[01]\b" matches = re.finditer(pattern, self.content, re.IGNORECASE) for match in matches: # Get surrounding context to check if it's in a text() SQL call start = max(0, match.start() - 200) context = self.content[start:match.end() + 50] if 'text(' in context and 'sqlite_master' not in context: problematic.append(f"{col}: ...{match.group()}...") self.assertEqual( len(problematic), 0, f"Found boolean columns compared to integers in raw SQL. " f"Use TRUE/FALSE instead of 1/0 for PostgreSQL compatibility:\n" + "\n".join(f" - {p}" for p in problematic) ) def test_reserved_keywords_quoted_in_index_creation(self): """ Ensure reserved keywords like 'user' are properly quoted in index creation. Raw SQL like 'CREATE INDEX ... ON user (column)' will fail on some databases because 'user' is a reserved keyword. It should be quoted as "user" or use the create_index_if_not_exists() utility. """ reserved_keywords = ['user', 'order', 'group', 'table', 'select', 'index'] problematic = [] for keyword in reserved_keywords: # Pattern to find unquoted reserved keyword after ON in index creation # Matches: CREATE INDEX ... ON user ( but not ON "user" or ON `user` pattern = rf"CREATE\s+(?:UNIQUE\s+)?INDEX[^;]*\s+ON\s+{keyword}\s*\(" matches = re.findall(pattern, self.content, re.IGNORECASE) for match in matches: # Skip if the keyword is already quoted if f'"{keyword}"' in match.lower() or f'`{keyword}`' in match.lower(): continue problematic.append((keyword, match[:80])) self.assertEqual( len(problematic), 0, f"Found unquoted reserved keywords in index creation. " f"Use create_index_if_not_exists() or quote the table name:\n" + "\n".join(f" - '{kw}' in: {sql}..." for kw, sql in problematic) ) def test_add_column_uses_utility(self): """ Ensure most ADD COLUMN operations use add_column_if_not_exists(). Direct ALTER TABLE ADD COLUMN statements should use the utility function to ensure cross-database compatibility with boolean defaults and quoting. """ # Count direct ALTER TABLE ADD COLUMN in text() calls direct_pattern = r"conn\.execute\s*\(\s*text\s*\(['\"][^'\"]*ALTER\s+TABLE[^'\"]*ADD\s+COLUMN" direct_matches = re.findall(direct_pattern, self.content, re.IGNORECASE) # Count uses of add_column_if_not_exists utility_pattern = r"add_column_if_not_exists\s*\(" utility_matches = re.findall(utility_pattern, self.content) # We expect most ADD COLUMN operations to use the utility # Allow some direct usage for special cases (e.g., table recreation) # but utility usage should significantly outnumber direct usage self.assertGreater( len(utility_matches), len(direct_matches), f"Found {len(direct_matches)} direct ALTER TABLE ADD COLUMN statements " f"vs {len(utility_matches)} add_column_if_not_exists() calls. " f"Consider using the utility function for cross-database compatibility." ) def test_incompatible_types_handled_by_utility(self): """ Ensure columns with PostgreSQL-incompatible types (DATETIME, BLOB) are added through add_column_if_not_exists() which auto-converts them, and NOT via raw ALTER TABLE statements that would bypass conversion. PostgreSQL type differences: - DATETIME -> TIMESTAMP - BLOB -> BYTEA """ incompatible_types = ['DATETIME', 'BLOB'] # Check for raw ALTER TABLE statements using incompatible types for sql_type in incompatible_types: pattern = rf"conn\.execute\s*\(\s*text\s*\(['\"][^'\"]*ALTER\s+TABLE[^'\"]*\b{sql_type}\b[^'\"]*['\"]" matches = re.findall(pattern, self.content, re.IGNORECASE) self.assertEqual( len(matches), 0, f"Found raw ALTER TABLE statements using '{sql_type}' which is incompatible with PostgreSQL. " f"Use add_column_if_not_exists() which auto-converts types:\n" + "\n".join(f" - {m[:100]}..." if len(m) > 100 else f" - {m}" for m in matches) ) # Verify that add_column_if_not_exists calls using these types exist # (confirming they go through the utility which handles conversion) for sql_type in incompatible_types: pattern = rf"add_column_if_not_exists\s*\([^)]*['\"]({sql_type})['\"]" matches = re.findall(pattern, self.content, re.IGNORECASE) # Just informational - these are fine because the utility converts them def test_no_double_quoted_string_defaults(self): """ Ensure no SQL DEFAULT values use double-quoted strings. In SQL, double quotes denote identifiers (column/table names), not string literals. SQLite tolerates this, but PostgreSQL will interpret DEFAULT "en" as a reference to a column named "en" and fail with 'column "en" does not exist'. String defaults must use single quotes: DEFAULT 'en' """ # Match DEFAULT followed by a double-quoted string value pattern = r'DEFAULT\s+"[^"]*"' lines = self.content.splitlines() problematic = [] for i, line in enumerate(lines, 1): if re.search(pattern, line, re.IGNORECASE): problematic.append(f" Line {i}: {line.strip()}") self.assertEqual( len(problematic), 0, f"Found double-quoted string defaults in init_db.py. " f"PostgreSQL interprets double quotes as column identifiers, not string literals. " f"Use single quotes instead (e.g., DEFAULT 'en' not DEFAULT \"en\"):\n" + "\n".join(problematic) ) def test_create_index_uses_utility_for_user_table(self): """ Ensure index creation on 'user' table uses create_index_if_not_exists(). The 'user' table name is a reserved keyword that requires special quoting. Using create_index_if_not_exists() handles this automatically. """ # Find all index creation on user table pattern = r"CREATE\s+(?:UNIQUE\s+)?INDEX[^;]*ON\s+[\"'`]?user[\"'`]?\s*\(" # Count raw index creation on user table in text() calls raw_pattern = r"conn\.execute\s*\(\s*text\s*\(['\"][^'\"]*CREATE\s+(?:UNIQUE\s+)?INDEX[^'\"]*ON\s+[\"'`]?user" raw_matches = re.findall(raw_pattern, self.content, re.IGNORECASE) # Count uses of create_index_if_not_exists for user table utility_pattern = r"create_index_if_not_exists\s*\([^)]*['\"]user['\"]" utility_matches = re.findall(utility_pattern, self.content, re.IGNORECASE) # All index creation on user table should use the utility # (excluding table recreation scenarios which have their own quoting) if len(raw_matches) > 0: # Check if these are in table recreation blocks (acceptable) table_recreation_pattern = r"CREATE\s+TABLE\s+user_new" has_table_recreation = re.search(table_recreation_pattern, self.content, re.IGNORECASE) if not has_table_recreation or len(raw_matches) > 1: self.fail( f"Found {len(raw_matches)} raw CREATE INDEX statements on 'user' table. " f"Use create_index_if_not_exists() for proper quoting of reserved keywords." ) if __name__ == '__main__': unittest.main()