forked from learnlatex/learnlatex.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
/
robots.txt
141 lines (141 loc) · 4.59 KB
/
robots.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
################################# ROBOTS.TXT ###################################
# #
# Alphabetically ordered whitelisting of legitimate web robots, which obey the #
# Robots Exclusion Standard (robots.txt). Each bot is shortly described in a #
# comment above the (list of) user-agent(s). Comment out or delete lines which #
# contain User-agents you do not wish to allow on your website. #
# Important: Blank lines are not allowed in the final robots.txt file! #
# Updates can be retrieved from: https://github.com/jonasjacek/robots.txt #
# #
# This document is licensed with a CC BY-NC-SA 4.0 license. #
# #
# Last update: 2020-12-17 #
# #
################################################################################
# so.com chinese search engine
User-agent: 360Spider
User-agent: 360Spider-Image
User-agent: 360Spider-Video
# google.com landing page quality checks
User-agent: AdsBot-Google
User-agent: AdsBot-Google-Mobile
# google.com app resource fetcher
User-agent: AdsBot-Google-Mobile-Apps
# bing ads bot
User-agent: adidxbot
# apple.com search engine
User-agent: Applebot
user-agent: AppleNewsBot
# baidu.com chinese search engine
User-agent: Baiduspider
User-agent: Baiduspider-image
User-agent: Baiduspider-news
User-agent: Baiduspider-video
# bing.com international search engine
User-agent: bingbot
User-agent: BingPreview
# bublup.com suggestion/search engine
User-agent: BublupBot
# commoncrawl.org open repository of web crawl data
User-agent: CCBot
# cliqz.com german in-product search engine
User-agent: Cliqzbot
# coccoc.com vietnamese search engine
User-agent: coccoc
User-agent: coccocbot-image
User-agent: coccocbot-web
# daum.net korean search engine
User-agent: Daumoa
# dazoo.fr french search engine
User-agent: Dazoobot
# deusu.de german search engine
User-agent: DeuSu
# duckduckgo.com international privacy search engine
User-agent: DuckDuckBot
User-agent: DuckDuckGo-Favicons-Bot
# eurip.com european search engine
User-agent: EuripBot
# exploratodo.com latin search engine
User-agent: Exploratodo
# facebook.com social network
User-agent: Facebot
# feedly.com feed fetcher
User-agent: Feedly
# findx.com european search engine
User-agent: Findxbot
# goo.ne.jp japanese search engine
User-agent: gooblog
# google.com international search engine
User-agent: Googlebot
User-agent: Googlebot-Image
User-agent: Googlebot-Mobile
User-agent: Googlebot-News
User-agent: Googlebot-Video
# so.com chinese search engine
User-agent: HaoSouSpider
# goo.ne.jp japanese search engine
User-agent: ichiro
# istella.it italian search engine
User-agent: istellabot
# jike.com / chinaso.com chinese search engine
User-agent: JikeSpider
# lycos.com & hotbot.com international search engine
User-agent: Lycos
# mail.ru russian search engine
User-agent: Mail.Ru
# google.com adsense bot
User-agent: Mediapartners-Google
# mojeek.com search engine
User-agent: MojeekBot
# bing.com international search engine
User-agent: msnbot
User-agent: msnbot-media
# orange.com international search engine
User-agent: OrangeBot
# pinterest.com social networtk
User-agent: Pinterest
# botje.nl dutch search engine
User-agent: Plukkie
# qwant.com french search engine
User-agent: Qwantify
# rambler.ru russian search engine
User-agent: Rambler
# seznam.cz czech search engine
User-agent: SeznamBot
# soso.com chinese search engine
User-agent: Sosospider
# yahoo.com international search engine
User-agent: Slurp
# sogou.com chinese search engine
User-agent: Sogou blog
User-agent: Sogou inst spider
User-agent: Sogou News Spider
User-agent: Sogou Orion spider
User-agent: Sogou spider2
User-agent: Sogou web spider
# sputnik.ru russian search engine
User-agent: SputnikBot
# ask.com international search engine
User-agent: Teoma
# twitter.com bot
User-agent: Twitterbot
# wotbox.com international search engine
User-agent: wotbox
# yacy.net p2p search software
User-agent: yacybot
# yandex.com russian search engine
User-agent: Yandex
User-agent: YandexMobileBot
# search.naver.com south korean search engine
user-agent: Yeti
# yioop.com international search engine
User-agent: YioopBot
# yooz.ir iranian search engine
User-agent: yoozBot
# youdao.com chinese search engine
User-agent: YoudaoBot
# crawling rule(s) for above bots
Disallow:
# disallow all other bots
User-agent: *
Disallow: /