# # robots.txt for FileFormat.Info (www.fileformat.info) # Sitemap: http://www.fileformat.info/sitemap.xml User-agent: * Disallow: /_ Disallow: /about/feed Disallow: /about/javad Disallow: /about/webal #Disallow: /css Disallow: /down #Disallow: /images #Disallow: /js Disallow: /mirror/news Disallow: /other/bookm Disallow: /security Disallow: /user Disallow: /honeypot.txt # # https://yandex.ru/support/webmaster/robot-workings/clean-param.html # Clean-param: hyperlink /info/charset/*/grid.htm Clean-param: text /tool/hash.htm Clean-param: sort /* # # some samples have relative links that cause crawlers to go nuts # Disallow: /format/unipage/sample/ # # Useless busybody: http://www.nameprotect.com/botinfo.html # User-agent: NPBot Disallow: / # # DigiMarc watermark checker # User-agent: digimarc Disallow: / User-agent: MarcSpider Disallow: / # # yet another robo-snitch # User-agent: turnitinbot Disallow: / # # shopping bots: nothing here to buy # User-agent: BecomeBot Disallow: / User-agent: updated Disallow: / # # if you're not going to send links my way, don't crush me # http://www.sitesell.com/sbider.html # User-agent: SBIder Disallow: /info # # nothing health-related here # http://www.kosmix.com/crawler.html # User-agent: voyager/1.0 Disallow: / # # no tickets for sale # http://www.stubhub.com/ # User-agent: StubHub Disallow: / User-agent: gsa-crawler Disallow: / # # no classifieds here # http://www.sygol.net/ # User-agent: SygolBot http://www.sygol.net Disallow: / # # no viruses here # securecomputing.com # User-agent: virus_detector Disallow: / # # nothing healthy here # healthline.com # User-agent: NimbleCrawler Disallow: / # # http://www.shopwiki.com/wiki/Help:Bot # nothing to buy here # User-agent: ShopWiki Disallow: / # # http://www.warebay.com/bot.html # nothing to buy here # User-agent: WBSearchBot Disallow: / # # annoying but harmless SEO monitor # https://ahrefs.com/robot/index.php User-Agent: AhrefsBot Crawl-Delay: 120