# ============================================================ # ClearlyComply — robots.txt # Domain: https://clearlycomply.org # Last updated: 2026-02-24 # ============================================================ # Allow all well-behaved crawlers User-agent: * Allow: / # Block admin / backend paths (add your CMS paths here) Disallow: /admin/ Disallow: /dashboard/ Disallow: /api/ Disallow: /wp-admin/ Disallow: /wp-login.php Disallow: /.env Disallow: /config/ Disallow: /private/ # Block duplicate / thin content (faceted search, session params) Disallow: /*?sort= Disallow: /*?filter= Disallow: /*?session= Disallow: /*?ref= Disallow: /*?utm_ # Allow critical CSS, JS, images for rendering Allow: /css/ Allow: /js/ Allow: /images/ Allow: /*.css$ Allow: /*.js$ # ============================================================ # Googlebot-specific rules # ============================================================ User-agent: Googlebot Allow: / # Crawl delay for heavy crawlers (milliseconds) # Crawl-delay: 1 # ============================================================ # Block AI training crawlers (optional — remove if preferred) # ============================================================ User-agent: GPTBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: CCBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Claude-Web Disallow: / # ============================================================ # Sitemap declarations — all 4 sitemaps # ============================================================ Sitemap: https://clearlycomply.org/sitemap.xml Sitemap: https://clearlycomply.org/sitemap-pages.xml Sitemap: https://clearlycomply.org/sitemap-services.xml Sitemap: https://clearlycomply.org/sitemap-cities.xml Sitemap: https://clearlycomply.org/sitemap-blog.xml