# robots.txt for Associations.com # Last updated: 2026-03-08 User-agent: * Allow: / # Sitemap location Sitemap: https://associations.com/sitemap.xml # Crawl rate optimization Crawl-delay: 1 # Block sensitive paths Disallow: /admin/ Disallow: /api/ Disallow: /internal/ Disallow: /stripe-webhook/ Disallow: /payment-intent/ Disallow: /track-click/ Disallow: /hot-lead/ Disallow: /jobs/create/ Disallow: /vendor-dashboard/ Disallow: /account/ Disallow: /login/ Disallow: /register/ Disallow: /reset-password/ Disallow: /verify-email/ # Block query parameters that create duplicate content Disallow: /*?utm_source= Disallow: /*?utm_medium= Disallow: /*?utm_campaign= Disallow: /*?ref= Disallow: /*?sort= Disallow: /*?filter= Disallow: /*?page= # Allow search pages but limit parameter crawling Allow: /search Disallow: /search?* # ── SPECIFIC BOT INSTRUCTIONS ────────────────────────────────────────────── # Googlebot User-agent: Googlebot Allow: / Crawl-delay: 0.5 # Googlebot-Image User-agent: Googlebot-Image Allow: /images/ Allow: /uploads/ Disallow: / # Googlebot-News User-agent: Googlebot-News Allow: /blog/ Allow: /news/ Allow: /resources/ Disallow: / # Bingbot User-agent: Bingbot Allow: / Crawl-delay: 1 # DuckDuckGo User-agent: DuckDuckBot Allow: / Crawl-delay: 1 # Baidu User-agent: Baiduspider Allow: / Crawl-delay: 2 # Yandex User-agent: YandexBot Allow: / Crawl-delay: 2 # ── SOCIAL MEDIA CRAWLERS ─────────────────────────────────────────────────── User-agent: Twitterbot Allow: / User-agent: facebookexternalhit Allow: / User-agent: LinkedInBot Allow: / User-agent: WhatsApp Allow: / User-agent: Slackbot Allow: / User-agent: Discordbot Allow: / # ── AI CRAWLERS ───────────────────────────────────────────────────────────── # Explicitly permitting reputable AI crawlers to index public content. # This improves discoverability in AI-powered search and chat interfaces. # OpenAI (ChatGPT, GPT-4, SearchGPT) User-agent: GPTBot Allow: / Disallow: /api/ Disallow: /internal/ Disallow: /vendor-dashboard/ # OpenAI content crawler User-agent: ChatGPT-User Allow: / # Anthropic (Claude) User-agent: ClaudeBot Allow: / Disallow: /api/ Disallow: /internal/ # Anthropic web crawler User-agent: anthropic-ai Allow: / # Google Gemini / Bard User-agent: Google-Extended Allow: / # Apple (Siri, Spotlight) User-agent: Applebot Allow: / Crawl-delay: 1 # Perplexity AI User-agent: PerplexityBot Allow: / # You.com User-agent: YouBot Allow: / # Meta AI (Llama) User-agent: meta-externalagent Allow: / # Cohere User-agent: cohere-ai Allow: / # Common Research & Academic Crawlers User-agent: ia_archiver Allow: / User-agent: Semanticscholar Allow: / # ── BLOCK BAD BOTS ────────────────────────────────────────────────────────── User-agent: AhrefsBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: PetalBot Disallow: / User-agent: DataForSeoBot Disallow: / User-agent: serpstatbot Disallow: / User-agent: SEOkicks Disallow: /