# Robots.txt for Torly.AI # Last updated: 2026-06-25 # ================================================ # DEFAULT RULE - Allow all crawlers # ================================================ User-agent: * Allow: / # ================================================ # SEARCH ENGINE BOTS - Explicitly allowed # ================================================ User-agent: Googlebot Allow: / User-agent: Bingbot Allow: / User-agent: DuckDuckBot Allow: / User-agent: Slurp Allow: / # ================================================ # AI CRAWLERS - GEO (Generative Engine Optimization) # ================================================ # These crawlers power AI assistants and should index your content # for accurate AI-generated responses User-agent: GPTBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: CCBot Allow: / User-agent: Claude-Web Allow: / User-agent: PerplexityBot Allow: / User-agent: Google-Extended Allow: / User-agent: Cohere-AI Allow: / User-agent: FacebookBot Allow: / User-agent: ClaudeBot Allow: / User-agent: anthropic-ai Allow: / User-agent: Anthropic-AI Allow: / User-agent: Amazonbot Allow: / User-agent: Applebot-Extended Allow: / User-agent: OAI-SearchBot Allow: / # ================================================ # AGGRESSIVE SEO TOOL BOTS - Block (no value, burn requests) # ================================================ User-agent: SemrushBot Disallow: / User-agent: AhrefsBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: PetalBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: Bytespider Disallow: / User-agent: YandexBot Disallow: / User-agent: Diffbot Disallow: / User-agent: DataForSeoBot Disallow: / User-agent: Screaming Frog SEO Spider Disallow: / User-agent: SeznamBot Disallow: / User-agent: BomboraBot Disallow: / User-agent: Sogou Disallow: / User-agent: seekport Disallow: / User-agent: MegaIndex Disallow: / User-agent: ZoominfoBot Disallow: / # ================================================ # DISALLOWED PATHS # ================================================ # API endpoints (not useful for search) Disallow: /api/cron/ Disallow: /api/webhooks/ Disallow: /api/internal/ # Next.js internals Disallow: /_next/static/ Disallow: /_next/image # Admin/private areas Disallow: /admin/ Disallow: /dashboard/ Disallow: /private/ # Temporary/test pages Disallow: /test/ Disallow: /tmp/ # WordPress internals (proxied but should not be indexed) Disallow: /wp-admin/ Disallow: /wp-login.php Disallow: /wp-json/ Disallow: /wp-content/themes/ Disallow: /wp-content/plugins/ Disallow: /wp-includes/ Allow: /wp-content/uploads/ # WordPress pagination & taxonomy archives (thin content, 401 errors) Disallow: /blog/page/ Disallow: /blog/category/*/page/ Disallow: /blog/tag/ Disallow: /blog/category/professional-services/ Disallow: /blog/category/visa-policy-updates/ # Legacy routes (redirected, but block crawling of old paths) Disallow: /embed/ Disallow: /faq/ Disallow: /get-started/ Disallow: /visa-assessment/ Disallow: /endorsing-bodies/ Disallow: /support/ Disallow: /cookie-policy/ Disallow: /privacy-policy/ # Phantom routes (probed by bots, do not exist) Disallow: /users Disallow: /Users/ Disallow: /documents Disallow: /project/ # WordPress feed URLs Disallow: /feed/ Disallow: /blog/*/feed/ # Origin server (should never be indexed directly) # Note: origin.torly.ai needs its own robots.txt blocking all crawlers # ================================================ # SITEMAP # ================================================ Sitemap: https://torly.ai/sitemap.xml