Initial release: DictIA v0.8.14-alpha (fork de Speakr, AGPL-3.0)

2026-03-16 21:47:37 +00:00
commit 42772a31ed
365 changed files with 103572 additions and 0 deletions
--- a/tests/test_migration_compatibility.py
+++ b/tests/test_migration_compatibility.py
@@ -0,0 +1,251 @@
+"""
+Test suite to ensure database migrations are compatible with both SQLite and PostgreSQL.
+
+These tests scan the init_db.py file for patterns that would break on PostgreSQL,
+such as SQLite-only boolean defaults (0/1 instead of FALSE/TRUE) and unquoted
+reserved keywords.
+
+Run with: python tests/test_migration_compatibility.py
+"""
+
+import re
+import unittest
+import os
+
+
+class TestMigrationCompatibility(unittest.TestCase):
+    """Tests to ensure init_db.py uses cross-database compatible SQL."""
+
+    @classmethod
+    def setUpClass(cls):
+        """Load init_db.py content once for all tests."""
+        # Find the project root
+        test_dir = os.path.dirname(os.path.abspath(__file__))
+        project_root = os.path.dirname(test_dir)
+        init_db_path = os.path.join(project_root, 'src', 'init_db.py')
+
+        with open(init_db_path, 'r') as f:
+            cls.content = f.read()
+
+    def test_no_raw_boolean_defaults_in_alter_table(self):
+        """
+        Ensure no raw ALTER TABLE statements use SQLite-only boolean defaults.
+
+        The pattern 'BOOLEAN DEFAULT 0' or 'BOOLEAN DEFAULT 1' in raw SQL
+        will fail on PostgreSQL, which requires 'DEFAULT FALSE' or 'DEFAULT TRUE'.
+
+        Using add_column_if_not_exists() handles this conversion automatically.
+        """
+        # Pattern to find raw SQL with text() that has BOOLEAN DEFAULT 0/1
+        # This matches: text('... BOOLEAN DEFAULT 0 ...') or text("...")
+        pattern = r"conn\.execute\s*\(\s*text\s*\(['\"]([^'\"]*BOOLEAN\s+DEFAULT\s+[01][^'\"]*)['\"]"
+
+        matches = re.findall(pattern, self.content, re.IGNORECASE)
+
+        # Filter out false positives - we're looking for raw ALTER TABLE statements
+        # not UPDATE statements or other SQL that legitimately uses 0/1
+        problematic = []
+        for match in matches:
+            match_upper = match.upper()
+            # Only flag if it's an ALTER TABLE with BOOLEAN DEFAULT 0/1
+            if 'ALTER TABLE' in match_upper and 'BOOLEAN' in match_upper:
+                if 'DEFAULT 0' in match or 'DEFAULT 1' in match:
+                    problematic.append(match)
+
+        self.assertEqual(
+            len(problematic), 0,
+            f"Found SQLite-only boolean defaults in raw ALTER TABLE statements. "
+            f"Use add_column_if_not_exists() instead:\n" +
+            "\n".join(f"  - {m[:100]}..." if len(m) > 100 else f"  - {m}" for m in problematic)
+        )
+
+    def test_no_boolean_integer_comparisons_in_raw_sql(self):
+        """
+        Ensure raw SQL doesn't compare boolean columns to integers (0/1).
+
+        PostgreSQL strictly separates boolean and integer types:
+        - 'column = 1' fails with 'operator does not exist: boolean = integer'
+        - 'column = TRUE' works on both SQLite (3.23+) and PostgreSQL
+
+        Known boolean columns in migrations: protect_from_deletion, email_verified,
+        auto_share_on_apply, share_with_group_lead, is_inbox, is_highlighted,
+        deletion_exempt, is_admin, can_share_publicly.
+        """
+        boolean_columns = [
+            'protect_from_deletion', 'email_verified', 'auto_share_on_apply',
+            'share_with_group_lead', 'is_inbox', 'is_highlighted',
+            'deletion_exempt', 'is_admin', 'can_share_publicly',
+            'auto_speaker_labelling', 'auto_summarization'
+        ]
+
+        # Find raw SQL in text() calls
+        sql_pattern = r"text\s*\(\s*['\"\"]\"\"(.*?)['\"\"]\"\"?\s*\)"
+        # Simpler: find lines with known boolean column = 0 or = 1
+        problematic = []
+        for col in boolean_columns:
+            # Match: column = 0 or column = 1 (not = TRUE/FALSE)
+            pattern = rf"{col}\s*=\s*[01]\b"
+            matches = re.finditer(pattern, self.content, re.IGNORECASE)
+            for match in matches:
+                # Get surrounding context to check if it's in a text() SQL call
+                start = max(0, match.start() - 200)
+                context = self.content[start:match.end() + 50]
+                if 'text(' in context and 'sqlite_master' not in context:
+                    problematic.append(f"{col}: ...{match.group()}...")
+
+        self.assertEqual(
+            len(problematic), 0,
+            f"Found boolean columns compared to integers in raw SQL. "
+            f"Use TRUE/FALSE instead of 1/0 for PostgreSQL compatibility:\n" +
+            "\n".join(f"  - {p}" for p in problematic)
+        )
+
+    def test_reserved_keywords_quoted_in_index_creation(self):
+        """
+        Ensure reserved keywords like 'user' are properly quoted in index creation.
+
+        Raw SQL like 'CREATE INDEX ... ON user (column)' will fail on some databases
+        because 'user' is a reserved keyword. It should be quoted as "user" or use
+        the create_index_if_not_exists() utility.
+        """
+        reserved_keywords = ['user', 'order', 'group', 'table', 'select', 'index']
+
+        problematic = []
+
+        for keyword in reserved_keywords:
+            # Pattern to find unquoted reserved keyword after ON in index creation
+            # Matches: CREATE INDEX ... ON user ( but not ON "user" or ON `user`
+            pattern = rf"CREATE\s+(?:UNIQUE\s+)?INDEX[^;]*\s+ON\s+{keyword}\s*\("
+
+            matches = re.findall(pattern, self.content, re.IGNORECASE)
+
+            for match in matches:
+                # Skip if the keyword is already quoted
+                if f'"{keyword}"' in match.lower() or f'`{keyword}`' in match.lower():
+                    continue
+                problematic.append((keyword, match[:80]))
+
+        self.assertEqual(
+            len(problematic), 0,
+            f"Found unquoted reserved keywords in index creation. "
+            f"Use create_index_if_not_exists() or quote the table name:\n" +
+            "\n".join(f"  - '{kw}' in: {sql}..." for kw, sql in problematic)
+        )
+
+    def test_add_column_uses_utility(self):
+        """
+        Ensure most ADD COLUMN operations use add_column_if_not_exists().
+
+        Direct ALTER TABLE ADD COLUMN statements should use the utility function
+        to ensure cross-database compatibility with boolean defaults and quoting.
+        """
+        # Count direct ALTER TABLE ADD COLUMN in text() calls
+        direct_pattern = r"conn\.execute\s*\(\s*text\s*\(['\"][^'\"]*ALTER\s+TABLE[^'\"]*ADD\s+COLUMN"
+        direct_matches = re.findall(direct_pattern, self.content, re.IGNORECASE)
+
+        # Count uses of add_column_if_not_exists
+        utility_pattern = r"add_column_if_not_exists\s*\("
+        utility_matches = re.findall(utility_pattern, self.content)
+
+        # We expect most ADD COLUMN operations to use the utility
+        # Allow some direct usage for special cases (e.g., table recreation)
+        # but utility usage should significantly outnumber direct usage
+        self.assertGreater(
+            len(utility_matches), len(direct_matches),
+            f"Found {len(direct_matches)} direct ALTER TABLE ADD COLUMN statements "
+            f"vs {len(utility_matches)} add_column_if_not_exists() calls. "
+            f"Consider using the utility function for cross-database compatibility."
+        )
+
+    def test_incompatible_types_handled_by_utility(self):
+        """
+        Ensure columns with PostgreSQL-incompatible types (DATETIME, BLOB) are
+        added through add_column_if_not_exists() which auto-converts them,
+        and NOT via raw ALTER TABLE statements that would bypass conversion.
+
+        PostgreSQL type differences:
+        - DATETIME -> TIMESTAMP
+        - BLOB -> BYTEA
+        """
+        incompatible_types = ['DATETIME', 'BLOB']
+
+        # Check for raw ALTER TABLE statements using incompatible types
+        for sql_type in incompatible_types:
+            pattern = rf"conn\.execute\s*\(\s*text\s*\(['\"][^'\"]*ALTER\s+TABLE[^'\"]*\b{sql_type}\b[^'\"]*['\"]"
+            matches = re.findall(pattern, self.content, re.IGNORECASE)
+
+            self.assertEqual(
+                len(matches), 0,
+                f"Found raw ALTER TABLE statements using '{sql_type}' which is incompatible with PostgreSQL. "
+                f"Use add_column_if_not_exists() which auto-converts types:\n" +
+                "\n".join(f"  - {m[:100]}..." if len(m) > 100 else f"  - {m}" for m in matches)
+            )
+
+        # Verify that add_column_if_not_exists calls using these types exist
+        # (confirming they go through the utility which handles conversion)
+        for sql_type in incompatible_types:
+            pattern = rf"add_column_if_not_exists\s*\([^)]*['\"]({sql_type})['\"]"
+            matches = re.findall(pattern, self.content, re.IGNORECASE)
+            # Just informational - these are fine because the utility converts them
+
+    def test_no_double_quoted_string_defaults(self):
+        """
+        Ensure no SQL DEFAULT values use double-quoted strings.
+
+        In SQL, double quotes denote identifiers (column/table names), not string
+        literals. SQLite tolerates this, but PostgreSQL will interpret DEFAULT "en"
+        as a reference to a column named "en" and fail with 'column "en" does not exist'.
+
+        String defaults must use single quotes: DEFAULT 'en'
+        """
+        # Match DEFAULT followed by a double-quoted string value
+        pattern = r'DEFAULT\s+"[^"]*"'
+
+        lines = self.content.splitlines()
+        problematic = []
+        for i, line in enumerate(lines, 1):
+            if re.search(pattern, line, re.IGNORECASE):
+                problematic.append(f"  Line {i}: {line.strip()}")
+
+        self.assertEqual(
+            len(problematic), 0,
+            f"Found double-quoted string defaults in init_db.py. "
+            f"PostgreSQL interprets double quotes as column identifiers, not string literals. "
+            f"Use single quotes instead (e.g., DEFAULT 'en' not DEFAULT \"en\"):\n" +
+            "\n".join(problematic)
+        )
+
+    def test_create_index_uses_utility_for_user_table(self):
+        """
+        Ensure index creation on 'user' table uses create_index_if_not_exists().
+
+        The 'user' table name is a reserved keyword that requires special quoting.
+        Using create_index_if_not_exists() handles this automatically.
+        """
+        # Find all index creation on user table
+        pattern = r"CREATE\s+(?:UNIQUE\s+)?INDEX[^;]*ON\s+[\"'`]?user[\"'`]?\s*\("
+
+        # Count raw index creation on user table in text() calls
+        raw_pattern = r"conn\.execute\s*\(\s*text\s*\(['\"][^'\"]*CREATE\s+(?:UNIQUE\s+)?INDEX[^'\"]*ON\s+[\"'`]?user"
+        raw_matches = re.findall(raw_pattern, self.content, re.IGNORECASE)
+
+        # Count uses of create_index_if_not_exists for user table
+        utility_pattern = r"create_index_if_not_exists\s*\([^)]*['\"]user['\"]"
+        utility_matches = re.findall(utility_pattern, self.content, re.IGNORECASE)
+
+        # All index creation on user table should use the utility
+        # (excluding table recreation scenarios which have their own quoting)
+        if len(raw_matches) > 0:
+            # Check if these are in table recreation blocks (acceptable)
+            table_recreation_pattern = r"CREATE\s+TABLE\s+user_new"
+            has_table_recreation = re.search(table_recreation_pattern, self.content, re.IGNORECASE)
+
+            if not has_table_recreation or len(raw_matches) > 1:
+                self.fail(
+                    f"Found {len(raw_matches)} raw CREATE INDEX statements on 'user' table. "
+                    f"Use create_index_if_not_exists() for proper quoting of reserved keywords."
+                )
+
+
+if __name__ == '__main__':
+    unittest.main()