# NB: Whenever changes are made to this file it should be validated e.g. at http://tool.motoricerca.info/robots-checker.phtml # NB: Only use one hash chacter for commenting. # NB: The only valid directives for robot.txt are User-agent and Disallow. #set a delay between page accesses to stop overloading the servers #User-agent: * #Crawl-delay: 10 # yahoo allows us to set a delay in secs #User-agent: YahooSeeker #Crawl-delay: 30 # go away # NPBot+(http://www.nameprotect.com/botinfo.html) - Plaguerism Checker User-agent: NPBot Disallow: / # http://www.turnitin.com/ - Plaguerism Checker User-agent: TurnitinBot Disallow: / # http://www.netseer.com/ - LA based startup spider User-agent: Teemer Disallow: / # http://www.WISEnutbot.com - LookSmart Spider User-agent: ZyBorg Disallow: / # LinkWalker - Marketing Co Spider User-agent: LinkWalker Disallow: / # Zeus - Marketing Co Spider User-agent: Zeus Disallow: / # Picsearch -indexing pictures from the web User-agent: psbot Disallow: / # Googlebot-Image -indexing pictures from the web User-agent: Googlebot-Image Disallow: / # Firefly is a music and film recommendation system User-agent: Firefly Disallow: / # HTTrack is an easy-to-use offline browser utility. # It allows you to download a World Wide website from the Internet to a local directory. User-agent: HTTrack Disallow: / # Accelobot is a search engine for online marketing trens and emergin technologies. User-agent: Accelobot Disallow: / # Hurricane Electric Internet Services, an ISP # It looks like it may be a student research project at Stamfod University. User-agent: Twiceler Disallow: / # IRL-crawler is a Texas A&M research project User-agent: IRLbot Disallow: / # MSRBot is a Microsoft web crawler used to collect data from the web for further study. User-agent: MSRBot Disallow: / # MJ12bot spiders the Web for the purpose of building a distributed search engine #User-Agent: MJ12bot #Crawl-Delay: 10 # ia_archiver - Alexa -TEMP #User-agent: ia_archiver #Disallow: / #User-agent: Slurp -TEMP #Disallow: / # Gigabot/2.0 -TEMP #User-agent: Gigabot/2.0 #Disallow: / # YahooSeeker User-agent: YahooSeeker Disallow: / # No one should index Link Static Pages #Disallow: /tiptop/links/LA/ #Disallow: /links/LA/uk/ User-agent: * Disallow: /style/extra/ Disallow: /links/LA/at/ Disallow: /links/LA/au/ Disallow: /links/LA/be/ Disallow: /links/LA/ca/ Disallow: /links/LA/ch/ Disallow: /links/LA/cn/ Disallow: /links/LA/de/ Disallow: /links/LA/dk/ Disallow: /links/LA/es/ Disallow: /links/LA/fi/ Disallow: /links/LA/fr/ Disallow: /links/LA/ie/ Disallow: /links/LA/it/ Disallow: /links/LA/nl/ Disallow: /links/LA/no/ Disallow: /links/LA/nz/ Disallow: /links/LA/pl/ Disallow: /links/LA/pt/ Disallow: /links/LA/ru/ Disallow: /links/LA/se/ Disallow: /links/LA/us/ Disallow: /links/LA/z_us/ Disallow: /tiptop/style/extra/ Disallow: /tiptop/links/LA/at/ Disallow: /tiptop/links/LA/au/ Disallow: /tiptop/links/LA/be/ Disallow: /tiptop/links/LA/ca/ Disallow: /tiptop/links/LA/ch/ Disallow: /tiptop/links/LA/cn/ Disallow: /tiptop/links/LA/de/ Disallow: /tiptop/links/LA/dk/ Disallow: /tiptop/links/LA/es/ Disallow: /tiptop/links/LA/fi/ Disallow: /tiptop/links/LA/fr/ Disallow: /tiptop/links/LA/ie/ Disallow: /tiptop/links/LA/it/ Disallow: /tiptop/links/LA/nl/ Disallow: /tiptop/links/LA/no/ Disallow: /tiptop/links/LA/nz/ Disallow: /tiptop/links/LA/pl/ Disallow: /tiptop/links/LA/pt/ Disallow: /tiptop/links/LA/ru/ Disallow: /tiptop/links/LA/se/ Disallow: /tiptop/links/LA/us/ Disallow: /tiptop/links/LA/z_us/ Disallow: /links/MM/ Disallow: /tiptop/links/MM/ Disallow: /tiptop/links/theXjob/ Disallow: /links/theXjob/ Disallow: /theXjob/links/ Disallow: /tiger/ Disallow: /tiptop/tiger/ Disallow: /login.asp Disallow: /sendtofriend.asp Disallow: /popup_content.asp