Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)
This commit is contained in:
251
tests/test_migration_compatibility.py
Normal file
251
tests/test_migration_compatibility.py
Normal file
@@ -0,0 +1,251 @@
|
||||
"""
|
||||
Test suite to ensure database migrations are compatible with both SQLite and PostgreSQL.
|
||||
|
||||
These tests scan the init_db.py file for patterns that would break on PostgreSQL,
|
||||
such as SQLite-only boolean defaults (0/1 instead of FALSE/TRUE) and unquoted
|
||||
reserved keywords.
|
||||
|
||||
Run with: python tests/test_migration_compatibility.py
|
||||
"""
|
||||
|
||||
import re
|
||||
import unittest
|
||||
import os
|
||||
|
||||
|
||||
class TestMigrationCompatibility(unittest.TestCase):
|
||||
"""Tests to ensure init_db.py uses cross-database compatible SQL."""
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
"""Load init_db.py content once for all tests."""
|
||||
# Find the project root
|
||||
test_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
project_root = os.path.dirname(test_dir)
|
||||
init_db_path = os.path.join(project_root, 'src', 'init_db.py')
|
||||
|
||||
with open(init_db_path, 'r') as f:
|
||||
cls.content = f.read()
|
||||
|
||||
def test_no_raw_boolean_defaults_in_alter_table(self):
|
||||
"""
|
||||
Ensure no raw ALTER TABLE statements use SQLite-only boolean defaults.
|
||||
|
||||
The pattern 'BOOLEAN DEFAULT 0' or 'BOOLEAN DEFAULT 1' in raw SQL
|
||||
will fail on PostgreSQL, which requires 'DEFAULT FALSE' or 'DEFAULT TRUE'.
|
||||
|
||||
Using add_column_if_not_exists() handles this conversion automatically.
|
||||
"""
|
||||
# Pattern to find raw SQL with text() that has BOOLEAN DEFAULT 0/1
|
||||
# This matches: text('... BOOLEAN DEFAULT 0 ...') or text("...")
|
||||
pattern = r"conn\.execute\s*\(\s*text\s*\(['\"]([^'\"]*BOOLEAN\s+DEFAULT\s+[01][^'\"]*)['\"]"
|
||||
|
||||
matches = re.findall(pattern, self.content, re.IGNORECASE)
|
||||
|
||||
# Filter out false positives - we're looking for raw ALTER TABLE statements
|
||||
# not UPDATE statements or other SQL that legitimately uses 0/1
|
||||
problematic = []
|
||||
for match in matches:
|
||||
match_upper = match.upper()
|
||||
# Only flag if it's an ALTER TABLE with BOOLEAN DEFAULT 0/1
|
||||
if 'ALTER TABLE' in match_upper and 'BOOLEAN' in match_upper:
|
||||
if 'DEFAULT 0' in match or 'DEFAULT 1' in match:
|
||||
problematic.append(match)
|
||||
|
||||
self.assertEqual(
|
||||
len(problematic), 0,
|
||||
f"Found SQLite-only boolean defaults in raw ALTER TABLE statements. "
|
||||
f"Use add_column_if_not_exists() instead:\n" +
|
||||
"\n".join(f" - {m[:100]}..." if len(m) > 100 else f" - {m}" for m in problematic)
|
||||
)
|
||||
|
||||
def test_no_boolean_integer_comparisons_in_raw_sql(self):
|
||||
"""
|
||||
Ensure raw SQL doesn't compare boolean columns to integers (0/1).
|
||||
|
||||
PostgreSQL strictly separates boolean and integer types:
|
||||
- 'column = 1' fails with 'operator does not exist: boolean = integer'
|
||||
- 'column = TRUE' works on both SQLite (3.23+) and PostgreSQL
|
||||
|
||||
Known boolean columns in migrations: protect_from_deletion, email_verified,
|
||||
auto_share_on_apply, share_with_group_lead, is_inbox, is_highlighted,
|
||||
deletion_exempt, is_admin, can_share_publicly.
|
||||
"""
|
||||
boolean_columns = [
|
||||
'protect_from_deletion', 'email_verified', 'auto_share_on_apply',
|
||||
'share_with_group_lead', 'is_inbox', 'is_highlighted',
|
||||
'deletion_exempt', 'is_admin', 'can_share_publicly',
|
||||
'auto_speaker_labelling', 'auto_summarization'
|
||||
]
|
||||
|
||||
# Find raw SQL in text() calls
|
||||
sql_pattern = r"text\s*\(\s*['\"\"]\"\"(.*?)['\"\"]\"\"?\s*\)"
|
||||
# Simpler: find lines with known boolean column = 0 or = 1
|
||||
problematic = []
|
||||
for col in boolean_columns:
|
||||
# Match: column = 0 or column = 1 (not = TRUE/FALSE)
|
||||
pattern = rf"{col}\s*=\s*[01]\b"
|
||||
matches = re.finditer(pattern, self.content, re.IGNORECASE)
|
||||
for match in matches:
|
||||
# Get surrounding context to check if it's in a text() SQL call
|
||||
start = max(0, match.start() - 200)
|
||||
context = self.content[start:match.end() + 50]
|
||||
if 'text(' in context and 'sqlite_master' not in context:
|
||||
problematic.append(f"{col}: ...{match.group()}...")
|
||||
|
||||
self.assertEqual(
|
||||
len(problematic), 0,
|
||||
f"Found boolean columns compared to integers in raw SQL. "
|
||||
f"Use TRUE/FALSE instead of 1/0 for PostgreSQL compatibility:\n" +
|
||||
"\n".join(f" - {p}" for p in problematic)
|
||||
)
|
||||
|
||||
def test_reserved_keywords_quoted_in_index_creation(self):
|
||||
"""
|
||||
Ensure reserved keywords like 'user' are properly quoted in index creation.
|
||||
|
||||
Raw SQL like 'CREATE INDEX ... ON user (column)' will fail on some databases
|
||||
because 'user' is a reserved keyword. It should be quoted as "user" or use
|
||||
the create_index_if_not_exists() utility.
|
||||
"""
|
||||
reserved_keywords = ['user', 'order', 'group', 'table', 'select', 'index']
|
||||
|
||||
problematic = []
|
||||
|
||||
for keyword in reserved_keywords:
|
||||
# Pattern to find unquoted reserved keyword after ON in index creation
|
||||
# Matches: CREATE INDEX ... ON user ( but not ON "user" or ON `user`
|
||||
pattern = rf"CREATE\s+(?:UNIQUE\s+)?INDEX[^;]*\s+ON\s+{keyword}\s*\("
|
||||
|
||||
matches = re.findall(pattern, self.content, re.IGNORECASE)
|
||||
|
||||
for match in matches:
|
||||
# Skip if the keyword is already quoted
|
||||
if f'"{keyword}"' in match.lower() or f'`{keyword}`' in match.lower():
|
||||
continue
|
||||
problematic.append((keyword, match[:80]))
|
||||
|
||||
self.assertEqual(
|
||||
len(problematic), 0,
|
||||
f"Found unquoted reserved keywords in index creation. "
|
||||
f"Use create_index_if_not_exists() or quote the table name:\n" +
|
||||
"\n".join(f" - '{kw}' in: {sql}..." for kw, sql in problematic)
|
||||
)
|
||||
|
||||
def test_add_column_uses_utility(self):
|
||||
"""
|
||||
Ensure most ADD COLUMN operations use add_column_if_not_exists().
|
||||
|
||||
Direct ALTER TABLE ADD COLUMN statements should use the utility function
|
||||
to ensure cross-database compatibility with boolean defaults and quoting.
|
||||
"""
|
||||
# Count direct ALTER TABLE ADD COLUMN in text() calls
|
||||
direct_pattern = r"conn\.execute\s*\(\s*text\s*\(['\"][^'\"]*ALTER\s+TABLE[^'\"]*ADD\s+COLUMN"
|
||||
direct_matches = re.findall(direct_pattern, self.content, re.IGNORECASE)
|
||||
|
||||
# Count uses of add_column_if_not_exists
|
||||
utility_pattern = r"add_column_if_not_exists\s*\("
|
||||
utility_matches = re.findall(utility_pattern, self.content)
|
||||
|
||||
# We expect most ADD COLUMN operations to use the utility
|
||||
# Allow some direct usage for special cases (e.g., table recreation)
|
||||
# but utility usage should significantly outnumber direct usage
|
||||
self.assertGreater(
|
||||
len(utility_matches), len(direct_matches),
|
||||
f"Found {len(direct_matches)} direct ALTER TABLE ADD COLUMN statements "
|
||||
f"vs {len(utility_matches)} add_column_if_not_exists() calls. "
|
||||
f"Consider using the utility function for cross-database compatibility."
|
||||
)
|
||||
|
||||
def test_incompatible_types_handled_by_utility(self):
|
||||
"""
|
||||
Ensure columns with PostgreSQL-incompatible types (DATETIME, BLOB) are
|
||||
added through add_column_if_not_exists() which auto-converts them,
|
||||
and NOT via raw ALTER TABLE statements that would bypass conversion.
|
||||
|
||||
PostgreSQL type differences:
|
||||
- DATETIME -> TIMESTAMP
|
||||
- BLOB -> BYTEA
|
||||
"""
|
||||
incompatible_types = ['DATETIME', 'BLOB']
|
||||
|
||||
# Check for raw ALTER TABLE statements using incompatible types
|
||||
for sql_type in incompatible_types:
|
||||
pattern = rf"conn\.execute\s*\(\s*text\s*\(['\"][^'\"]*ALTER\s+TABLE[^'\"]*\b{sql_type}\b[^'\"]*['\"]"
|
||||
matches = re.findall(pattern, self.content, re.IGNORECASE)
|
||||
|
||||
self.assertEqual(
|
||||
len(matches), 0,
|
||||
f"Found raw ALTER TABLE statements using '{sql_type}' which is incompatible with PostgreSQL. "
|
||||
f"Use add_column_if_not_exists() which auto-converts types:\n" +
|
||||
"\n".join(f" - {m[:100]}..." if len(m) > 100 else f" - {m}" for m in matches)
|
||||
)
|
||||
|
||||
# Verify that add_column_if_not_exists calls using these types exist
|
||||
# (confirming they go through the utility which handles conversion)
|
||||
for sql_type in incompatible_types:
|
||||
pattern = rf"add_column_if_not_exists\s*\([^)]*['\"]({sql_type})['\"]"
|
||||
matches = re.findall(pattern, self.content, re.IGNORECASE)
|
||||
# Just informational - these are fine because the utility converts them
|
||||
|
||||
def test_no_double_quoted_string_defaults(self):
|
||||
"""
|
||||
Ensure no SQL DEFAULT values use double-quoted strings.
|
||||
|
||||
In SQL, double quotes denote identifiers (column/table names), not string
|
||||
literals. SQLite tolerates this, but PostgreSQL will interpret DEFAULT "en"
|
||||
as a reference to a column named "en" and fail with 'column "en" does not exist'.
|
||||
|
||||
String defaults must use single quotes: DEFAULT 'en'
|
||||
"""
|
||||
# Match DEFAULT followed by a double-quoted string value
|
||||
pattern = r'DEFAULT\s+"[^"]*"'
|
||||
|
||||
lines = self.content.splitlines()
|
||||
problematic = []
|
||||
for i, line in enumerate(lines, 1):
|
||||
if re.search(pattern, line, re.IGNORECASE):
|
||||
problematic.append(f" Line {i}: {line.strip()}")
|
||||
|
||||
self.assertEqual(
|
||||
len(problematic), 0,
|
||||
f"Found double-quoted string defaults in init_db.py. "
|
||||
f"PostgreSQL interprets double quotes as column identifiers, not string literals. "
|
||||
f"Use single quotes instead (e.g., DEFAULT 'en' not DEFAULT \"en\"):\n" +
|
||||
"\n".join(problematic)
|
||||
)
|
||||
|
||||
def test_create_index_uses_utility_for_user_table(self):
|
||||
"""
|
||||
Ensure index creation on 'user' table uses create_index_if_not_exists().
|
||||
|
||||
The 'user' table name is a reserved keyword that requires special quoting.
|
||||
Using create_index_if_not_exists() handles this automatically.
|
||||
"""
|
||||
# Find all index creation on user table
|
||||
pattern = r"CREATE\s+(?:UNIQUE\s+)?INDEX[^;]*ON\s+[\"'`]?user[\"'`]?\s*\("
|
||||
|
||||
# Count raw index creation on user table in text() calls
|
||||
raw_pattern = r"conn\.execute\s*\(\s*text\s*\(['\"][^'\"]*CREATE\s+(?:UNIQUE\s+)?INDEX[^'\"]*ON\s+[\"'`]?user"
|
||||
raw_matches = re.findall(raw_pattern, self.content, re.IGNORECASE)
|
||||
|
||||
# Count uses of create_index_if_not_exists for user table
|
||||
utility_pattern = r"create_index_if_not_exists\s*\([^)]*['\"]user['\"]"
|
||||
utility_matches = re.findall(utility_pattern, self.content, re.IGNORECASE)
|
||||
|
||||
# All index creation on user table should use the utility
|
||||
# (excluding table recreation scenarios which have their own quoting)
|
||||
if len(raw_matches) > 0:
|
||||
# Check if these are in table recreation blocks (acceptable)
|
||||
table_recreation_pattern = r"CREATE\s+TABLE\s+user_new"
|
||||
has_table_recreation = re.search(table_recreation_pattern, self.content, re.IGNORECASE)
|
||||
|
||||
if not has_table_recreation or len(raw_matches) > 1:
|
||||
self.fail(
|
||||
f"Found {len(raw_matches)} raw CREATE INDEX statements on 'user' table. "
|
||||
f"Use create_index_if_not_exists() for proper quoting of reserved keywords."
|
||||
)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user