# robots.txt for justinrwood.com
#
# This site reserves all rights against AI / ML training. The full
# human-readable policy is at https://justinrwood.com/usage/ and the
# machine-readable companions are:
#   - <meta name="robots" content="noai, noimageai"> on every HTML page
#   - X-Robots-Tag: noai, noimageai on image responses (.htaccess)
#   - https://justinrwood.com/.well-known/tdmrep.json  (W3C TDMRep)
#   - https://justinrwood.com/ai.txt                    (Spawning / ai.txt)
#
# Article 4(3) of EU Directive 2019/790 explicitly reserves these works
# from text-and-data-mining and AI training. See /usage/ for the full
# reservation language.
#
# Generic search engines (Googlebot, Bingbot, DuckDuckBot, Applebot
# without the -Extended variant) are intentionally NOT blocked — they
# index the site for search but do not, per their public docs, use the
# content for generative-AI training.

# ---------------------------------------------------------------------------
# AI / generative-AI / training crawlers — explicitly disallowed.
# Each agent must be its own block (one User-agent per group).
# Sourced from ai-robots-txt/ai.robots.txt (community list) plus the
# 2026 list from Cloudflare's bot inventory.
# ---------------------------------------------------------------------------

User-agent: GPTBot
Disallow: /

User-agent: ChatGPT-User
Disallow: /

User-agent: OAI-SearchBot
Disallow: /

User-agent: ClaudeBot
Disallow: /

User-agent: Claude-Web
Disallow: /

User-agent: anthropic-ai
Disallow: /

User-agent: Google-Extended
Disallow: /

User-agent: GoogleOther
Disallow: /

User-agent: GoogleOther-Image
Disallow: /

User-agent: GoogleOther-Video
Disallow: /

User-agent: PerplexityBot
Disallow: /

User-agent: Perplexity-User
Disallow: /

User-agent: CCBot
Disallow: /

User-agent: Bytespider
Disallow: /

User-agent: Applebot-Extended
Disallow: /

User-agent: Amazonbot
Disallow: /

User-agent: Meta-ExternalAgent
Disallow: /

User-agent: Meta-ExternalFetcher
Disallow: /

User-agent: FacebookBot
Disallow: /

User-agent: Diffbot
Disallow: /

User-agent: ImagesiftBot
Disallow: /

User-agent: img2dataset
Disallow: /

User-agent: omgili
Disallow: /

User-agent: omgilibot
Disallow: /

User-agent: cohere-ai
Disallow: /

User-agent: cohere-training-data-crawler
Disallow: /

User-agent: AI2Bot
Disallow: /

User-agent: AI2Bot-Dolma
Disallow: /

User-agent: Timpibot
Disallow: /

User-agent: FriendlyCrawler
Disallow: /

User-agent: VelenPublicWebCrawler
Disallow: /

User-agent: YouBot
Disallow: /

User-agent: PetalBot
Disallow: /

User-agent: iaskspider/2.0
Disallow: /

User-agent: ICC-Crawler
Disallow: /

User-agent: ISSCyberRiskCrawler
Disallow: /

User-agent: Kangaroo Bot
Disallow: /

User-agent: Sentibot
Disallow: /

User-agent: Webzio-Extended
Disallow: /

User-agent: DuckAssistBot
Disallow: /

User-agent: MistralAI-User
Disallow: /

User-agent: NovaAct
Disallow: /

User-agent: PanguBot
Disallow: /

User-agent: Scrapy
Disallow: /

User-agent: SemrushBot-OCOB
Disallow: /

User-agent: TikTokSpider
Disallow: /

User-agent: AddSearchBot
Disallow: /

User-agent: BrightBot
Disallow: /

User-agent: Crawlspace
Disallow: /

User-agent: DataForSeoBot
Disallow: /

User-agent: Devin
Disallow: /

# ---------------------------------------------------------------------------
# Default: any other user-agent may index normally.
# Generic search engines (Googlebot, Bingbot, etc.) reach this rule.
# ---------------------------------------------------------------------------
User-agent: *
Allow: /

Sitemap: https://justinrwood.com/sitemap-index.xml