diff --git a/release/doc/constant-values.html b/release/doc/constant-values.html
index 3705f66..7cf88a4 100644
--- a/release/doc/constant-values.html
+++ b/release/doc/constant-values.html
@@ -86,7 +86,7 @@
net.pieroxy.*
public static final java.lang.String
VERSION |
-"2015.04.12.08.04.GMT" |
+"2015.04.20.21.55.GMT" |
diff --git a/release/doc/index-all.html b/release/doc/index-all.html
index 7cc3824..1cf6e56 100644
--- a/release/doc/index-all.html
+++ b/release/doc/index-all.html
@@ -499,7 +499,7 @@ U
UserAgentDetector - Class in net.pieroxy.ua.detection
-This is the documentation for the version 2015.04.12.08.04.GMT of the library.
+This is the documentation for the version 2015.04.20.21.55.GMT of the library.
UserAgentDetector() - Constructor for class net.pieroxy.ua.detection.UserAgentDetector
diff --git a/release/doc/net/pieroxy/ua/detection/BotFamily.html b/release/doc/net/pieroxy/ua/detection/BotFamily.html
index 0a6eb10..3e973c4 100644
--- a/release/doc/net/pieroxy/ua/detection/BotFamily.html
+++ b/release/doc/net/pieroxy/ua/detection/BotFamily.html
@@ -134,16 +134,21 @@ Enum Constant Summary
+HIDDEN_BOT
+A robot that hides as a regular browser.
+ |
+
+
ROBOT
A generic robot.
|
-
+
SPAMBOT
A spam bot.
|
-
+
UNKNOWN
Unknown type of bot
|
@@ -233,6 +238,16 @@ SPAMBOT
A spam bot.
+
+
+
+
diff --git a/release/doc/net/pieroxy/ua/detection/Brand.html b/release/doc/net/pieroxy/ua/detection/Brand.html
index 94cbf78..b1c2a48 100644
--- a/release/doc/net/pieroxy/ua/detection/Brand.html
+++ b/release/doc/net/pieroxy/ua/detection/Brand.html
@@ -142,29 +142,35 @@ Enum Constant Summary
AMAZON |
-APPLE |
+APACHE |
-ARCHOS |
+APPLE |
-ASK |
+ARCHOS |
-ASUS |
+ASK |
-AVANT |
+ASUS |
-BAIDU |
+AVANT |
-BANANAFISH |
+BAIDU |
+BANANAFISH |
+
+
BE |
+
+BOULANGER |
+
CHROMIUM |
@@ -415,48 +421,51 @@ Enum Constant Summary
SONY |
-SUN |
+SRWARE |
-TENCENT |
+SUN |
-TOSHIBA |
+TENCENT |
-UNIXLIKE |
+TOSHIBA |
-UNKNOWN |
+UNIXLIKE |
-UNKNOWN_ANDROID |
+UNKNOWN |
-UTSTARCOM |
+UNKNOWN_ANDROID |
-VIVALDI |
+UTSTARCOM |
-WEBIN |
+VIVALDI |
-WIKO |
+WEBIN |
-WINDOWS |
+WIKO |
-YACI |
+WINDOWS |
-YAHOO |
+YACI |
-YANDEX |
+YAHOO |
+YANDEX |
+
+
ZTE |
@@ -690,6 +699,15 @@ ILEGEND
public static final Brand ILEGEND
+
+
+
+
@@ -1158,6 +1176,15 @@ KYOCERA
public static final Brand KYOCERA
+
+
+
+
@@ -1311,6 +1338,15 @@ OPENSOURCE
public static final Brand OPENSOURCE
+
+
+
+
diff --git a/release/doc/net/pieroxy/ua/detection/UserAgentDetector.html b/release/doc/net/pieroxy/ua/detection/UserAgentDetector.html
index 3efa24f..f54adfc 100644
--- a/release/doc/net/pieroxy/ua/detection/UserAgentDetector.html
+++ b/release/doc/net/pieroxy/ua/detection/UserAgentDetector.html
@@ -101,7 +101,7 @@ Class UserAgentDetector
public class UserAgentDetector
extends java.lang.Object
implements IUserAgentDetector
-This is the documentation for the version 2015.04.12.08.04.GMT of the library.
+This is the documentation for the version 2015.04.20.21.55.GMT of the library.
diff --git a/release/doc/net/pieroxy/ua/detection/package-summary.html b/release/doc/net/pieroxy/ua/detection/package-summary.html
index 9430a63..f5708b1 100644
--- a/release/doc/net/pieroxy/ua/detection/package-summary.html
+++ b/release/doc/net/pieroxy/ua/detection/package-summary.html
@@ -135,7 +135,7 @@ Package net.pieroxy.ua.detection
UserAgentDetector |
- This is the documentation for the version 2015.04.12.08.04.GMT of the library.
+This is the documentation for the version 2015.04.20.21.55.GMT of the library.
|
diff --git a/release/user-agent-detector.jar b/release/user-agent-detector.jar
index b8db52a..1b796e2 100644
Binary files a/release/user-agent-detector.jar and b/release/user-agent-detector.jar differ
diff --git a/src/net/pieroxy/ua/detection/BotFamily.java b/src/net/pieroxy/ua/detection/BotFamily.java
index 0c1f6a9..18a6d77 100644
--- a/src/net/pieroxy/ua/detection/BotFamily.java
+++ b/src/net/pieroxy/ua/detection/BotFamily.java
@@ -13,6 +13,10 @@ public enum BotFamily { /**
*/
SPAMBOT("Spam bot",true ),
/**
+ * A robot that hides as a regular browser. There are considered to be nefarious (because they hide.)
+ */
+ HIDDEN_BOT("Hidden bot",true ),
+ /**
* A robot used to crawl the web (Google's crawler, Bing's crawler, etc.)
*/
CRAWLER("Web Crawler",false ),
diff --git a/src/net/pieroxy/ua/detection/Brand.java b/src/net/pieroxy/ua/detection/Brand.java
index c9ae762..35b5f36 100644
--- a/src/net/pieroxy/ua/detection/Brand.java
+++ b/src/net/pieroxy/ua/detection/Brand.java
@@ -22,6 +22,7 @@ public enum Brand { HAOSOU("Haosou", "http://www.haosou.com"),
TENCENT("Tencent Holdings Limited", "http://www.tencent.com/en-us/index.shtml"),
CLOUDMOSA("CloudMosa Inc", "http://www.cloudmosa.com/contact"),
ILEGEND("iLegendSoft, Inc.", "http://www.ilegendsoft.com"),
+ BOULANGER("EssentielB", "http://www.essentielb.fr/"),
LINKEDIN("LinkedIn", "http://www.linkedin.com"),
BANANAFISH("Bananafish Software", "http://bananafishsoftware.com"),
WEBIN("Webin", "http://webinhq.com"),
@@ -74,6 +75,7 @@ public enum Brand { HAOSOU("Haosou", "http://www.haosou.com"),
DELL("Dell", "http://www.dell.com"),
COMPAQ("Compaq", "http://www.compaq.com"),
KYOCERA("Kyocera", "http://global.kyocera.com"),
+ SRWARE("SRWare", "http://www.srware.net/en"),
ALCATEL("Alcatel", "http://www.alcatelonetouch.com/global-en"),
FUJITSU("Fujitsu", "http://www.fujitsu.com"),
LOGICOM("Logicom", "http://www.logicom-europe.com"),
@@ -91,6 +93,7 @@ public enum Brand { HAOSOU("Haosou", "http://www.haosou.com"),
LUNASCAPE("Lunascape", "http://www.lunascape.tv"),
CHROMIUM("The Chromium Project", "http://www.chromium.org"),
OPENSOURCE("An Open Source Project"),
+ APACHE("Apache Software Foundation", "https://www.apache.org"),
VIVALDI("Vivaldi Technologies", "https://vivaldi.com"),
KDE("KDE", "http://www.kde.org"),
ACCESSCO("Access Co. Ltd.", "http://www.access-company.com"),
diff --git a/src/net/pieroxy/ua/detection/UserAgentDetector.java b/src/net/pieroxy/ua/detection/UserAgentDetector.java
index ca0e7b3..cab7872 100644
--- a/src/net/pieroxy/ua/detection/UserAgentDetector.java
+++ b/src/net/pieroxy/ua/detection/UserAgentDetector.java
@@ -100,12 +100,41 @@ static Bot getBot(UserAgentContext context) {
String ver;
String[]multi;
- if (context.ignore("ONDOWN3.2", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) { // Looks like a bot to me.
+ if (context.getUA().equals("Mozilla/0.6 Beta (Windows)") || context.getUA().equals("Mozilla/0.91 Beta (Windows)")) {
+ context.consumeAllTokens();
+ return new Bot(Brand.UNKNOWN,BotFamily.HIDDEN_BOT,"","");
+ } else if (context.consume("ONDOWN3.2", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) { // Looks like a bot to me.
return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"ONDOWN","3.2");
} else if (context.consume("Google Web Preview", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) {
context.consume("generic", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
context.consume("iPhone", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
return new Bot(Brand.GOOGLE, BotFamily.ROBOT,"Web Preview","");
+ } else if (context.consume("Contact: backend@getprismatic.com", MatchingType.EQUALS, MatchingRegion.PARENTHESIS) ||
+ (multi = context.getcNextTokens(new Matcher[] {new Matcher("Contact:", MatchingType.EQUALS),
+ new Matcher("feedback@getprismatic.com", MatchingType.EQUALS)
+ },
+ MatchingRegion.REGULAR)) != null) {
+ return new Bot(Brand.OTHER, BotFamily.ROBOT,"Get Prismatic Bot","", "http://getprismatic.com/");
+ }
+ else if ((ver=context.getcVersionAfterPattern("Diffbot/", MatchingType.BEGINS,MatchingRegion.BOTH))!=null ||
+ (ver=context.getcVersionAfterPattern("diffbot/", MatchingType.BEGINS,MatchingRegion.REGULAR))!=null ||
+ context.contains("+http://www.diffbot.com", MatchingType.BEGINS,MatchingRegion.PARENTHESIS)) {
+ return new Bot(Brand.OTHER, BotFamily.ROBOT,"Diffbot ", ver==null?"":ver, consumeUrlAndMozilla(context, "http://"));
+ } else if ((ver=context.getcVersionAfterPattern("oBot/", MatchingType.BEGINS,MatchingRegion.BOTH))!=null) {
+ return new Bot(Brand.IBM, BotFamily.ROBOT,"oBot ", ver, consumeUrlAndMozilla(context, "http://"));
+ } else if ((ver=context.getcVersionAfterPattern("yoozBot-", MatchingType.BEGINS,MatchingRegion.PARENTHESIS))!=null) {
+ context.consume("[0-9a-zA-Z\\.]+@[0-9a-zA-Z\\.]+", MatchingType.REGEXP, MatchingRegion.PARENTHESIS);
+ return new Bot(Brand.OTHER, BotFamily.CRAWLER,"Yooz Bot ", ver, consumeUrlAndMozilla(context, "http://"));
+ } else if ((ver=context.getcVersionAfterPattern("GWPImages/", MatchingType.BEGINS,MatchingRegion.PARENTHESIS))!=null) {
+ return new Bot(Brand.OTHER, BotFamily.ROBOT,"GWPImages ", ver, consumeUrlAndMozilla(context, "http://"));
+ } else if ((ver=context.getcVersionAfterPattern("BLEXBot/", MatchingType.BEGINS,MatchingRegion.PARENTHESIS))!=null) {
+ return new Bot(Brand.OTHER, BotFamily.ROBOT,"BLEX Bot ", ver, consumeUrlAndMozilla(context, "http://"));
+ } else if ((ver=context.getcVersionAfterPattern("LSSRocketCrawler/", MatchingType.BEGINS,MatchingRegion.REGULAR))!=null) {
+ context.consume("LightspeedSystems", MatchingType.EQUALS, MatchingRegion.REGULAR);
+ return new Bot(Brand.OTHER, BotFamily.ROBOT,"LSSRocketCrawler ", ver);
+ } else if ((ver=context.getcVersionAfterPattern("OrangeBot/", MatchingType.BEGINS,MatchingRegion.PARENTHESIS))!=null) {
+ context.consume("[0-9a-zA-Z\\.]+@[0-9a-zA-Z\\.]+", MatchingType.REGEXP, MatchingRegion.PARENTHESIS);
+ return new Bot(Brand.ORANGE, BotFamily.CRAWLER,"Orange Bot ", ver, consumeUrlAndMozilla(context, "http://"));
} else if ((ver=context.getcVersionAfterPattern("del.icio.us-thumbnails/", MatchingType.BEGINS,MatchingRegion.BOTH))!=null) {
return new Bot(Brand.DELICIOUS, BotFamily.ROBOT,"Thumbnails crawler ", ver);
} else if ((ver=context.getcVersionAfterPattern("EvoHtmlToPdf/", MatchingType.BEGINS,MatchingRegion.REGULAR))!=null) {
@@ -116,6 +145,11 @@ static Bot getBot(UserAgentContext context) {
}
context.consume("Unknown", MatchingType.EQUALS,MatchingRegion.PARENTHESIS);
return new Bot(Brand.OPENSOURCE,BotFamily.ROBOT,"PhantomJS", ver);
+ } else if (context.consume("theoldreader.com", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) {
+ context.consume("feed-id=", MatchingType.BEGINS, MatchingRegion.PARENTHESIS);
+ context.consume("[0-9]+ subscribers", MatchingType.REGEXP, MatchingRegion.PARENTHESIS);
+ consumeUrlAndMozilla(context, "http://");
+ return new Bot(Brand.GOOGLE,BotFamily.FEED_CRAWLER,"RSS Feed Fetcher","","http://theoldreader.com/");
} else if (context.consume("Feedfetcher-Google;", MatchingType.EQUALS, MatchingRegion.REGULAR)) {
context.consume("feed-id=", MatchingType.BEGINS, MatchingRegion.PARENTHESIS);
context.consume("[0-9]+ subscribers", MatchingType.REGEXP, MatchingRegion.PARENTHESIS);
@@ -480,8 +514,7 @@ else if (context.consume("spbot/",MatchingType.BEGINS, MatchingRegion.PARENTHESI
}
return new Bot(Brand.OTHER, BotFamily.ROBOT, "360 Spider", "");
} else if ((ver=context.getcVersionAfterPattern("FlipboardProxy/",MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) {
- context.consume("+http://flipboard.com/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS);
- return new Bot(Brand.OTHER, BotFamily.ROBOT, "Flipboard Proxy", ver);
+ return new Bot(Brand.OTHER, BotFamily.ROBOT, "Flipboard Proxy", ver, consumeUrlAndMozilla(context,"http://"));
} else if (context.consume("Exabot/",MatchingType.BEGINS, MatchingRegion.BOTH) || context.consume("Exabot-Images/",MatchingType.BEGINS, MatchingRegion.BOTH) || context.consume("Exabot-Test/",MatchingType.BEGINS, MatchingRegion.BOTH)) {
context.consume("BiggerBetter", MatchingType.BEGINS, MatchingRegion.PARENTHESIS);
return new Bot(Brand.EXALEAD, BotFamily.CRAWLER, "Exalead crawler", "", consumeUrlAndMozilla(context,"http://"));
@@ -1300,28 +1333,32 @@ static Browser tryOpera(UserAgentContext context) {
}
static float tryParseVersionNumber(String s) {
- StringBuilder sb = new StringBuilder(20);
- int status = 0;
- for (int i=0 ; i0)
+ ver = ver.substring(ver.indexOf(" ruby ")+6);
+ String rver = ver;
+
+ res.browser.family = BrowserFamily.LIBRARY;
+ res.browser.vendor = Brand.OPENSOURCE;
+ res.browser.description = "Ruby";
+ res.operatingSystem = new OS(Brand.UNKNOWN,OSFamily.UNKNOWN,"","");
+
+ if ((ver=context.getcVersionAfterPattern("Mechanize/",MatchingType.BEGINS, MatchingRegion.REGULAR)) != null) {
+ context.consume("http://",MatchingType.BEGINS, MatchingRegion.PARENTHESIS);
+ res.browser.description = "Mechanize (Ruby)";
+ } else if ((ver=context.getcVersionAfterPattern("HTTPClient/",MatchingType.BEGINS, MatchingRegion.REGULAR)) != null) {
+ context.consume("[0-9]{4}-[0-9]{2}-[0-9]{2}",MatchingType.REGEXP, MatchingRegion.PARENTHESIS);
+ res.browser.description = "HTTPClient (Ruby"+(rver!=null?" "+rver:"")+")";
+ } else if (context.consume("Atig::Http/",MatchingType.BEGINS, MatchingRegion.REGULAR)) {
+ if (context.consume("arm-linux.*", MatchingType.REGEXP, MatchingRegion.PARENTHESIS)) {
+ res.device.architecture = "arm";
+ res.operatingSystem.family = OSFamily.LINUX;
+ res.operatingSystem.description = "Linux";
+ } else if (context.consume("i386-linux.*", MatchingType.REGEXP, MatchingRegion.PARENTHESIS)) {
+ res.device.architecture = "i386";
+ res.operatingSystem.family = OSFamily.LINUX;
+ res.operatingSystem.description = "Linux";
+ } else if (context.consume("i686-linux.*", MatchingType.REGEXP, MatchingRegion.PARENTHESIS)) {
+ res.device.architecture = "i686";
+ res.operatingSystem.family = OSFamily.LINUX;
+ res.operatingSystem.description = "Linux";
+ } else if (context.consume("x86_64-linux.*", MatchingType.REGEXP, MatchingRegion.PARENTHESIS)) {
+ res.device.architecture = "x86_64";
+ res.operatingSystem.family = OSFamily.LINUX;
+ res.operatingSystem.description = "Linux";
+ }
+ res.browser.description = "Atig (Ruby)";
+
+ context.consume("http.rb", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
+ context.consume("net-irc", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
+ }
+ if (ver != null) res.browser.setFullVersionOneShot(ver);
+ else if (rver != null) res.browser.setFullVersionOneShot(rver);
+
+ return res;
+ } else if ((ver=context.getcVersionAfterPattern("Commons-HttpClient/",MatchingType.BEGINS, MatchingRegion.REGULAR)) != null ||
+ (ver=context.getcVersionAfterPattern("Apache-HttpClient/",MatchingType.BEGINS, MatchingRegion.REGULAR)) != null) {
+ context.consume("Jakarta",MatchingType.EQUALS, MatchingRegion.REGULAR);
+ context.consume("java ",MatchingType.BEGINS, MatchingRegion.PARENTHESIS);
+ res.browser.family = BrowserFamily.LIBRARY;
+ res.browser.vendor = Brand.APACHE;
+ res.browser.description = "Commons HttpClient";
+ res.browser.setFullVersionOneShot(ver);
+ res.operatingSystem = new OS(Brand.UNKNOWN,OSFamily.UNKNOWN,"","");
return res;
} else if ((ver=context.getcVersionAfterPattern("Wget/",MatchingType.BEGINS, MatchingRegion.REGULAR)) != null) {
res.browser.family = BrowserFamily.LIBRARY;
@@ -3855,6 +3955,7 @@ static void consumeRandomGarbage(UserAgentContext context, UserAgentDetectionRes
while (context.ignore("\\[xSP_2:[0-9a-f]+_[0-9]+\\]", MatchingType.REGEXP, MatchingRegion.PARENTHESIS));
context.ignore("[0-9]+", MatchingType.REGEXP, MatchingRegion.PARENTHESIS);
}
+ context.ignore("APCPMS=", MatchingType.BEGINS, MatchingRegion.PARENTHESIS); // ?
context.ignore("BO[0-9]?IE[89](_v[0-9]+)?", MatchingType.REGEXP, MatchingRegion.PARENTHESIS); // Bing optimized bullshit
context.ignore("msn OptimizedIE8", MatchingType.REGEXP, MatchingRegion.PARENTHESIS); // Bing optimized bullshit
context.ignore("Tucows", MatchingType.EQUALS, MatchingRegion.PARENTHESIS); // Dunno
diff --git a/test-data/database.gz b/test-data/database.gz
index 2154467..a68fb6c 100644
Binary files a/test-data/database.gz and b/test-data/database.gz differ