feat(marketing): exempt public blueprints from noindex + fix / route collision
- add_no_crawl_headers now skips marketing.*, legal.*, billing.success, static, and robots_txt endpoints via _is_public_indexable_endpoint helper; all other routes keep the X-Robots-Tag noindex header - recordings.index drops @login_required and instead redirects anonymous users to marketing.landing, resolving the URL-map collision between recordings_bp and marketing_bp at "/" - robots.txt rewritten: public marketing pages and /legal/* allowed, /api/, /admin, /account, /share/, /app/, /checkout, /login, /signup, /webhooks/ disallowed; Googlebot, Bingbot, ClaudeBot, GPTBot, PerplexityBot, Applebot explicitly allowed - New tests/test_no_crawl_headers.py (14 tests) covers exemption helper + integration on /, /robots.txt, /static, /admin, /login - New tests/test_marketing_root_redirect.py (4 tests) verifies anonymous users at / never get a /login redirect Tests verified via AST + logic walkthrough; pytest blocked on Windows by pre-existing fcntl import in src/init_db.py (B-1.2 limitation).
This commit is contained in:
122
tests/test_no_crawl_headers.py
Normal file
122
tests/test_no_crawl_headers.py
Normal file
@@ -0,0 +1,122 @@
|
||||
"""Verify the no-crawl X-Robots-Tag is exempted for marketing/legal/billing
|
||||
blueprints (Task B-1.3).
|
||||
|
||||
Marketing pages must be indexable by search engines and AI crawlers
|
||||
(Loi 25 transparency, GEO/SEO strategy). The after_request hook in
|
||||
src/app.py keeps emitting `X-Robots-Tag: noindex, nofollow, ...` for
|
||||
private routes (/api/*, /admin, /account, /share, /app, /auth/*,
|
||||
recordings dashboard).
|
||||
|
||||
Pattern: no conftest.py, env vars set at module load time, then import
|
||||
src.app.app directly. Mirrors tests/test_blueprint_registration.py.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
os.environ.setdefault('SQLALCHEMY_DATABASE_URI', 'sqlite:///:memory:')
|
||||
os.environ.setdefault('SECRET_KEY', 'test-secret-key-for-no-crawl-headers')
|
||||
|
||||
from src.app import app, _is_public_indexable_endpoint # noqa: E402
|
||||
|
||||
|
||||
def test_marketing_root_has_no_noindex_header():
|
||||
"""Marketing root '/' must NOT have X-Robots-Tag noindex header."""
|
||||
client = app.test_client()
|
||||
response = client.get('/')
|
||||
# Anonymous user gets the marketing landing (B-1.3 swap of recordings.index).
|
||||
robots_tag = response.headers.get('X-Robots-Tag', '')
|
||||
assert 'noindex' not in robots_tag.lower(), (
|
||||
f"Expected no noindex on /, got X-Robots-Tag: {robots_tag!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_robots_txt_route_has_no_noindex_header():
|
||||
"""The /robots.txt response itself must not be noindexed (it's a directive)."""
|
||||
client = app.test_client()
|
||||
response = client.get('/robots.txt')
|
||||
robots_tag = response.headers.get('X-Robots-Tag', '')
|
||||
assert 'noindex' not in robots_tag.lower(), (
|
||||
f"Expected no noindex on /robots.txt, got X-Robots-Tag: {robots_tag!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_static_assets_have_no_noindex_header():
|
||||
"""Static assets must not carry X-Robots-Tag noindex."""
|
||||
client = app.test_client()
|
||||
# We don't need a real static file — the endpoint resolution is what
|
||||
# the after_request hook keys off. Use a known-missing path; Flask's
|
||||
# static handler still resolves request.endpoint to 'static'.
|
||||
response = client.get('/static/this-does-not-exist.css')
|
||||
robots_tag = response.headers.get('X-Robots-Tag', '')
|
||||
assert 'noindex' not in robots_tag.lower(), (
|
||||
f"Expected no noindex on /static/*, got X-Robots-Tag: {robots_tag!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_private_route_still_has_noindex():
|
||||
"""A private app route must STILL have noindex headers."""
|
||||
client = app.test_client()
|
||||
# /admin requires login; anonymous gets a redirect, but the after_request
|
||||
# hook still runs on the redirect response. That redirect response must
|
||||
# carry the noindex header (defense-in-depth).
|
||||
response = client.get('/admin', follow_redirects=False)
|
||||
robots_tag = response.headers.get('X-Robots-Tag', '')
|
||||
assert 'noindex' in robots_tag.lower(), (
|
||||
f"Expected noindex on /admin, got X-Robots-Tag: {robots_tag!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_login_route_still_has_noindex():
|
||||
"""The /login page is a private utility surface and must keep noindex."""
|
||||
client = app.test_client()
|
||||
response = client.get('/login', follow_redirects=False)
|
||||
robots_tag = response.headers.get('X-Robots-Tag', '')
|
||||
assert 'noindex' in robots_tag.lower(), (
|
||||
f"Expected noindex on /login, got X-Robots-Tag: {robots_tag!r}"
|
||||
)
|
||||
|
||||
|
||||
# --- Direct unit tests on the helper predicate ---
|
||||
|
||||
|
||||
def test_helper_marketing_endpoint_is_indexable():
|
||||
assert _is_public_indexable_endpoint('marketing.landing') is True
|
||||
|
||||
|
||||
def test_helper_legal_endpoint_is_indexable():
|
||||
assert _is_public_indexable_endpoint('legal.confidentialite') is True
|
||||
|
||||
|
||||
def test_helper_billing_success_is_indexable():
|
||||
assert _is_public_indexable_endpoint('billing.success') is True
|
||||
|
||||
|
||||
def test_helper_billing_other_endpoints_not_indexable():
|
||||
# Only billing.success is exempt; the rest of the checkout flow stays private.
|
||||
assert _is_public_indexable_endpoint('billing.checkout') is False
|
||||
assert _is_public_indexable_endpoint('billing.webhook') is False
|
||||
|
||||
|
||||
def test_helper_static_endpoint_is_indexable():
|
||||
assert _is_public_indexable_endpoint('static') is True
|
||||
|
||||
|
||||
def test_helper_robots_endpoint_is_indexable():
|
||||
assert _is_public_indexable_endpoint('robots_txt') is True
|
||||
|
||||
|
||||
def test_helper_api_endpoint_not_indexable():
|
||||
assert _is_public_indexable_endpoint('api.something') is False
|
||||
|
||||
|
||||
def test_helper_recordings_index_not_indexable():
|
||||
"""The dashboard at '/' (authenticated branch) must keep noindex."""
|
||||
assert _is_public_indexable_endpoint('recordings.index') is False
|
||||
|
||||
|
||||
def test_helper_none_endpoint_not_indexable():
|
||||
assert _is_public_indexable_endpoint(None) is False
|
||||
assert _is_public_indexable_endpoint('') is False
|
||||
Reference in New Issue
Block a user