"""Verify the no-crawl X-Robots-Tag is exempted for marketing/legal/billing blueprints (Task B-1.3). Marketing pages must be indexable by search engines and AI crawlers (Loi 25 transparency, GEO/SEO strategy). The after_request hook in src/app.py keeps emitting `X-Robots-Tag: noindex, nofollow, ...` for private routes (/api/*, /admin, /account, /share, /app, /auth/*, recordings dashboard). Pattern: no conftest.py, env vars set at module load time, then import src.app.app directly. Mirrors tests/test_blueprint_registration.py. """ import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) os.environ.setdefault('SQLALCHEMY_DATABASE_URI', 'sqlite:///:memory:') os.environ.setdefault('SECRET_KEY', 'test-secret-key-for-no-crawl-headers') from src.app import app, _is_public_indexable_endpoint # noqa: E402 def test_marketing_root_has_no_noindex_header(): """Marketing root '/' must NOT have X-Robots-Tag noindex header.""" client = app.test_client() response = client.get('/') # Anonymous user gets the marketing landing (B-1.3 swap of recordings.index). robots_tag = response.headers.get('X-Robots-Tag', '') assert 'noindex' not in robots_tag.lower(), ( f"Expected no noindex on /, got X-Robots-Tag: {robots_tag!r}" ) def test_robots_txt_route_has_no_noindex_header(): """The /robots.txt response itself must not be noindexed (it's a directive).""" client = app.test_client() response = client.get('/robots.txt') robots_tag = response.headers.get('X-Robots-Tag', '') assert 'noindex' not in robots_tag.lower(), ( f"Expected no noindex on /robots.txt, got X-Robots-Tag: {robots_tag!r}" ) def test_static_assets_have_no_noindex_header(): """Static assets must not carry X-Robots-Tag noindex.""" client = app.test_client() # We don't need a real static file — the endpoint resolution is what # the after_request hook keys off. Use a known-missing path; Flask's # static handler still resolves request.endpoint to 'static'. response = client.get('/static/this-does-not-exist.css') robots_tag = response.headers.get('X-Robots-Tag', '') assert 'noindex' not in robots_tag.lower(), ( f"Expected no noindex on /static/*, got X-Robots-Tag: {robots_tag!r}" ) def test_private_route_still_has_noindex(): """A private app route must STILL have noindex headers.""" client = app.test_client() # /admin requires login; anonymous gets a redirect, but the after_request # hook still runs on the redirect response. That redirect response must # carry the noindex header (defense-in-depth). response = client.get('/admin', follow_redirects=False) robots_tag = response.headers.get('X-Robots-Tag', '') assert 'noindex' in robots_tag.lower(), ( f"Expected noindex on /admin, got X-Robots-Tag: {robots_tag!r}" ) def test_login_route_still_has_noindex(): """The /login page is a private utility surface and must keep noindex.""" client = app.test_client() response = client.get('/login', follow_redirects=False) robots_tag = response.headers.get('X-Robots-Tag', '') assert 'noindex' in robots_tag.lower(), ( f"Expected noindex on /login, got X-Robots-Tag: {robots_tag!r}" ) # --- Direct unit tests on the helper predicate --- def test_helper_marketing_endpoint_is_indexable(): assert _is_public_indexable_endpoint('marketing.landing') is True def test_helper_legal_endpoint_is_indexable(): assert _is_public_indexable_endpoint('legal.confidentialite') is True def test_helper_billing_success_is_indexable(): assert _is_public_indexable_endpoint('billing.success') is True def test_helper_billing_other_endpoints_not_indexable(): # Only billing.success is exempt; the rest of the checkout flow stays private. assert _is_public_indexable_endpoint('billing.checkout') is False assert _is_public_indexable_endpoint('billing.webhook') is False def test_helper_static_endpoint_is_indexable(): assert _is_public_indexable_endpoint('static') is True def test_helper_robots_endpoint_is_indexable(): assert _is_public_indexable_endpoint('robots_txt') is True def test_helper_api_endpoint_not_indexable(): assert _is_public_indexable_endpoint('api.something') is False def test_helper_recordings_index_not_indexable(): """The dashboard at '/' (authenticated branch) must keep noindex.""" assert _is_public_indexable_endpoint('recordings.index') is False def test_helper_none_endpoint_not_indexable(): assert _is_public_indexable_endpoint(None) is False assert _is_public_indexable_endpoint('') is False