-
Notifications
You must be signed in to change notification settings - Fork 4.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
56689a2
commit 8c18b48
Showing
12 changed files
with
513 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
local cache = require "kong.tools.database_cache" | ||
|
||
local _M = {} | ||
|
||
local INDEX = "bot_detection_index" | ||
|
||
function _M.set(key, value) | ||
cache.set(cache.bot_detection_key(key), value) | ||
local index_keys = cache.get(INDEX) | ||
if not index_keys then index_keys = {} end | ||
index_keys[#index_keys+1] = key | ||
cache.set(INDEX, index_keys) | ||
end | ||
|
||
function _M.get(key) | ||
return cache.get(cache.bot_detection_key(key)) | ||
end | ||
|
||
function _M.reset() | ||
local index_keys = cache.get(INDEX) | ||
for _, key in ipairs(index_keys) do | ||
cache.delete(cache.bot_detection_key(key)) | ||
end | ||
cache.delete(INDEX) | ||
end | ||
|
||
return _M |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
local BasePlugin = require "kong.plugins.base_plugin" | ||
local responses = require "kong.tools.responses" | ||
local rules = require "kong.plugins.bot-detection.rules" | ||
local bot_cache = require "kong.plugins.bot-detection.cache" | ||
local strip = require("kong.tools.utils").strip | ||
|
||
local ipairs = ipairs | ||
local get_headers = ngx.req.get_headers | ||
local re_match = ngx.re.match | ||
|
||
local BotDetectionHandler = BasePlugin:extend() | ||
|
||
BotDetectionHandler.PRIORITY = 2500 | ||
|
||
local function get_user_agent() | ||
local user_agent = get_headers()["user-agent"] | ||
if type(user_agent) == "table" then | ||
return nil, "Only one User-Agent header allowed" | ||
end | ||
return user_agent | ||
end | ||
|
||
function BotDetectionHandler:new() | ||
BotDetectionHandler.super.new(self, "bot-detection") | ||
end | ||
|
||
function BotDetectionHandler:access(conf) | ||
BotDetectionHandler.super.access(self) | ||
|
||
local user_agent, err = get_user_agent() | ||
if err then | ||
return responses.send_HTTP_BAD_REQUEST(err) | ||
end | ||
|
||
if user_agent then | ||
user_agent = strip(user_agent) | ||
|
||
-- Cache key, per API | ||
local cache_key = ngx.ctx.api.id..":"..user_agent | ||
|
||
-- The cache already has the user_agents that should be blocked | ||
-- So we avoid matching the regexes everytime | ||
local cached_match = bot_cache.get(cache_key) | ||
if cached_match then | ||
return | ||
elseif cached_match == false then | ||
return responses.send_HTTP_FORBIDDEN() | ||
end | ||
|
||
if conf.whitelist then | ||
for _, rule in ipairs(conf.whitelist) do | ||
if re_match(user_agent, rule) then | ||
bot_cache.set(cache_key, true) | ||
return | ||
end | ||
end | ||
end | ||
|
||
if conf.blacklist then | ||
for _, rule in ipairs(conf.blacklist) do | ||
if re_match(user_agent, rule) then | ||
bot_cache.set(cache_key, false) | ||
return responses.send_HTTP_FORBIDDEN() | ||
end | ||
end | ||
end | ||
|
||
for _, rule in ipairs(rules.bots) do | ||
if re_match(user_agent, rule) then | ||
bot_cache.set(cache_key, false) | ||
return responses.send_HTTP_FORBIDDEN() | ||
end | ||
end | ||
|
||
bot_cache.set(cache_key, true) | ||
end | ||
end | ||
|
||
return BotDetectionHandler |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
local events = require "kong.core.events" | ||
local bot_cache = require "kong.plugins.bot-detection.cache" | ||
|
||
local function invalidate(message_t) | ||
if message_t.collection == "plugins" and message_t.entity.name == "bot-detection" then | ||
bot_cache.reset() | ||
end | ||
end | ||
|
||
return { | ||
[events.TYPES.ENTITY_UPDATED] = function(message_t) | ||
invalidate(message_t) | ||
end | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
-- List taken from https://github.com/ua-parser/uap-core/blob/master/regexes.yaml | ||
|
||
return { | ||
bots = { | ||
[[(Pingdom.com_bot_version_)(\d+)\.(\d+)]], -- Pingdom | ||
[[(facebookexternalhit)/(\d+)\.(\d+)]], -- Facebook | ||
[[Google.*/\+/web/snippet]], -- Google Plus | ||
[[(Twitterbot)/(\d+)\.(\d+)]], -- Twitter | ||
[[/((?:Ant-)?Nutch|[A-z]+[Bb]ot|[A-z]+[Ss]pider|Axtaris|fetchurl|Isara|ShopSalad|Tailsweep)[ \-](\d+)(?:\.(\d+)(?:\.(\d+))?)?]], -- Bots Pattern '/name-0.0' | ||
[[(008|Altresium|Argus|BaiduMobaider|BoardReader|DNSGroup|DataparkSearch|EDI|Goodzer|Grub|INGRID|Infohelfer|LinkedInBot|LOOQ|Nutch|PathDefender|Peew|PostPost|Steeler|Twitterbot|VSE|WebCrunch|WebZIP|Y!J-BR[A-Z]|YahooSeeker|envolk|sproose|wminer)/(\d+)(?:\.(\d+)(?:\.(\d+))?)?]], --Bots Pattern 'name/0.0' | ||
[[(MSIE) (\d+)\.(\d+)([a-z]\d?)?;.* MSIECrawler]], --MSIECrawler | ||
[[(Google-HTTP-Java-Client|Apache-HttpClient|http%20client|Python-urllib|HttpMonitor|TLSProber|WinHTTP|JNLP)(?:[ /](\d+)(?:\.(\d+)(?:\.(\d+))?)?)?]], -- Downloader ... | ||
[[(1470\.net crawler|50\.nu|8bo Crawler Bot|Aboundex|Accoona-[A-z]+-Agent|AdsBot-Google(?:-[a-z]+)?|altavista|AppEngine-Google|archive.*?\.org_bot|archiver|Ask Jeeves|[Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]+)*|bingbot|BingPreview|blitzbot|BlogBridge|BoardReader(?: [A-Za-z]+)*|boitho.com-dc|BotSeer|\b\w*favicon\w*\b|\bYeti(?:-[a-z]+)?|Catchpoint bot|[Cc]harlotte|Checklinks|clumboot|Comodo HTTP\(S\) Crawler|Comodo-Webinspector-Crawler|ConveraCrawler|CRAWL-E|CrawlConvera|Daumoa(?:-feedfetcher)?|Feed Seeker Bot|findlinks|Flamingo_SearchEngine|FollowSite Bot|furlbot|Genieo|gigabot|GomezAgent|gonzo1|(?:[a-zA-Z]+-)?Googlebot(?:-[a-zA-Z]+)?|Google SketchUp|grub-client|gsa-crawler|heritrix|HiddenMarket|holmes|HooWWWer|htdig|ia_archiver|ICC-Crawler|Icarus6j|ichiro(?:/mobile)?|IconSurf|IlTrovatore(?:-Setaccio)?|InfuzApp|Innovazion Crawler|InternetArchive|IP2[a-z]+Bot|jbot\b|KaloogaBot|Kraken|Kurzor|larbin|LEIA|LesnikBot|Linguee Bot|LinkAider|LinkedInBot|Lite Bot|Llaut|lycos|Mail\.RU_Bot|masidani_bot|Mediapartners-Google|Microsoft .*? Bot|mogimogi|mozDex|MJ12bot|msnbot(?:-media *)?|msrbot|netresearch|Netvibes|NewsGator[^/]*|^NING|Nutch[^/]*|Nymesis|ObjectsSearch|Orbiter|OOZBOT|PagePeeker|PagesInventory|PaxleFramework|Peeplo Screenshot Bot|PlantyNet_WebRobot|Pompos|Read%20Later|Reaper|RedCarpet|Retreiver|Riddler|Rival IQ|scooter|Scrapy|Scrubby|searchsight|seekbot|semanticdiscovery|Simpy|SimplePie|SEOstats|SimpleRSS|SiteCon|Slurp|snappy|Speedy Spider|Squrl Java|TheUsefulbot|ThumbShotsBot|Thumbshots\.ru|TwitterBot|URL2PNG|Vagabondo|VoilaBot|^vortex|Votay bot|^voyager|WASALive.Bot|Web-sniffer|WebThumb|WeSEE:[A-z]+|WhatWeb|WIRE|WordPress|Wotbox|www\.almaden\.ibm\.com|Xenu(?:.s)? Link Sleuth|Xerka [A-z]+Bot|yacy(?:bot)?|Yahoo[a-z]*Seeker|Yahoo! Slurp|Yandex\w+|YodaoBot(?:-[A-z]+)?|YottaaMonitor|Yowedo|^Zao|^Zao-Crawler|ZeBot_www\.ze\.bz|ZooShot|ZyBorg)(?:[ /]v?(\d+)(?:\.(\d+)(?:\.(\d+))?)?)?]], -- Bots | ||
[[(?:\/[A-Za-z0-9\.]+)? *([A-Za-z0-9 \-_\!\[\]:]*(?:[Aa]rchiver|[Ii]ndexer|[Ss]craper|[Bb]ot|[Ss]pider|[Cc]rawl[a-z]*))/(\d+)(?:\.(\d+)(?:\.(\d+))?)?]], -- Bots General matcher 'name/0.0' | ||
[[(?:\/[A-Za-z0-9\.]+)? *([A-Za-z0-9 _\!\[\]:]*(?:[Aa]rchiver|[Ii]ndexer|[Ss]craper|[Bb]ot|[Ss]pider|[Cc]rawl[a-z]*)) (\d+)(?:\.(\d+)(?:\.(\d+))?)?]], -- Bots General matcher 'name 0.0' | ||
[[((?:[A-z0-9]+|[A-z\-]+ ?)?(?: the )?(?:[Ss][Pp][Ii][Dd][Ee][Rr]|[Ss]crape|[A-Za-z0-9-]*(?:[^C][^Uu])[Bb]ot|[Cc][Rr][Aa][Ww][Ll])[A-z0-9]*)(?:(?:[ /]| v)(\d+)(?:\.(\d+)(?:\.(\d+))?)?)?]] -- Bots containing spider|scrape|bot(but not CUBOT)|Crawl | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
local re_match = ngx.re.match | ||
|
||
local check_regex = function(value) | ||
if value then | ||
for _, rule in ipairs(value) do | ||
local _, err = re_match("just a string to test", rule) | ||
if err then | ||
return false, "value '"..rule.."' is not a valid regex" | ||
end | ||
end | ||
end | ||
return true | ||
end | ||
|
||
return { | ||
no_consumer = true, | ||
fields = { | ||
whitelist = { type = "array", func = check_regex }, | ||
blacklist = { type = "array", func = check_regex }, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
local helpers = require "spec.helpers" | ||
|
||
local HELLOWORLD = "HelloWorld" -- just a test value | ||
local FACEBOOK = "facebookexternalhit/1.1" -- matches a known bot in `rules.lua` | ||
|
||
describe("Plugin: bot-detection (access)", function() | ||
|
||
local client | ||
|
||
setup(function() | ||
helpers.prepare_prefix() | ||
|
||
local api1 = assert(helpers.dao.apis:insert { | ||
request_host = "bot.com", | ||
upstream_url = "http://mockbin.com" | ||
}) | ||
local api2 = assert(helpers.dao.apis:insert { | ||
request_host = "bot2.com", | ||
upstream_url = "http://mockbin.com" | ||
}) | ||
local api3 = assert(helpers.dao.apis:insert { | ||
request_host = "bot3.com", | ||
upstream_url = "http://mockbin.com" | ||
}) | ||
|
||
-- plugin 1 | ||
assert(helpers.dao.plugins:insert { | ||
api_id = api1.id, | ||
name = "bot-detection", | ||
config = {}, | ||
}) | ||
-- plugin 2 | ||
assert(helpers.dao.plugins:insert { | ||
api_id = api2.id, | ||
name = "bot-detection", | ||
config = { | ||
blacklist = HELLOWORLD | ||
}, | ||
}) | ||
-- plugin 3 | ||
assert(helpers.dao.plugins:insert { | ||
api_id = api3.id, | ||
name = "bot-detection", | ||
config = { | ||
whitelist = FACEBOOK | ||
}, | ||
}) | ||
|
||
assert(helpers.start_kong()) | ||
end) | ||
|
||
teardown(function() | ||
helpers.stop_kong() | ||
end) | ||
|
||
before_each(function() | ||
client = assert(helpers.proxy_client()) | ||
end) | ||
|
||
after_each(function() | ||
if client then client:close() end | ||
end) | ||
|
||
it("allows regular requests", function() | ||
local res = assert( client:send { | ||
method = "GET", | ||
path = "/request", | ||
headers = { host = "bot.com" } | ||
}) | ||
assert.response(res).has.status(200) | ||
|
||
local res = assert( client:send { | ||
method = "GET", | ||
path = "/request", | ||
headers = { | ||
host = "bot.com", | ||
["user-agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36" | ||
} | ||
}) | ||
assert.response(res).has.status(200) | ||
|
||
local res = assert( client:send { | ||
method = "GET", | ||
path = "/request", | ||
headers = { | ||
host = "bot.com", | ||
["user-agent"] = HELLOWORLD | ||
} | ||
}) | ||
assert.response(res).has.status(200) | ||
|
||
local res = assert( client:send { | ||
method = "GET", | ||
path = "/request", | ||
headers = { | ||
host = "bot.com", | ||
["user-agent"] = "curl/7.43.0" | ||
} | ||
}) | ||
assert.response(res).has.status(200) | ||
end) | ||
|
||
it("blocks bots", function() | ||
local res = assert( client:send { | ||
method = "GET", | ||
path = "/request", | ||
headers = { | ||
host = "bot.com", | ||
["user-agent"] = "Googlebot/2.1 (+http://www.google.com/bot.html)" | ||
}, | ||
}) | ||
assert.response(res).has.status(403) | ||
|
||
local res = assert( client:send { | ||
method = "GET", | ||
path = "/request", | ||
headers = { | ||
host = "bot.com", | ||
["user-agent"] = FACEBOOK, | ||
} | ||
}) | ||
assert.response(res).has.status(403) | ||
end) | ||
|
||
it("blocks blacklisted user-agents", function() | ||
local res = assert( client:send { | ||
method = "GET", | ||
path = "/request", | ||
headers = { | ||
host = "bot2.com", | ||
["user-agent"] = HELLOWORLD, | ||
} | ||
}) | ||
assert.response(res).has.status(403) | ||
end) | ||
|
||
it("allows whitelisted user-agents", function() | ||
local res = assert( client:send { | ||
method = "GET", | ||
path = "/request", | ||
headers = { | ||
host = "bot3.com", | ||
["user-agent"] = FACEBOOK | ||
} | ||
}) | ||
assert.response(res).has.status(200) | ||
end) | ||
|
||
end) |
Oops, something went wrong.