From 771dfb2dfec729ad04d396ef54b5f56d5018bfdd Mon Sep 17 00:00:00 2001 From: Lucio Martinez Date: Sat, 22 Nov 2025 15:44:08 +0100 Subject: [PATCH] feat: add more bot platforms (#542) Co-authored-by: naorpeled --- src/constants.js | 61 ++++++---- src/parser-browsers.js | 125 ++++++++++++++++++++ src/parser-platforms.js | 99 ++++++++++++++++ test/acceptance/useragentstrings.yml | 168 +++++++++++++++++++++++++++ 4 files changed, 432 insertions(+), 21 deletions(-) diff --git a/src/constants.js b/src/constants.js index b0da115..e16e23d 100644 --- a/src/constants.js +++ b/src/constants.js @@ -1,12 +1,16 @@ // NOTE: this list must be up-to-date with browsers listed in // test/acceptance/useragentstrings.yml export const BROWSER_ALIASES_MAP = { + AmazonBot: 'amazonbot', 'Amazon Silk': 'amazon_silk', 'Android Browser': 'android', + BaiduSpider: 'baiduspider', Bada: 'bada', + BingCrawler: 'bingcrawler', BlackBerry: 'blackberry', Chrome: 'chrome', Chromium: 'chromium', + DuckDuckBot: 'duckduckbot', Electron: 'electron', Epiphany: 'epiphany', Firefox: 'firefox', @@ -15,8 +19,10 @@ export const BROWSER_ALIASES_MAP = { 'Google Search': 'google_search', Googlebot: 'googlebot', 'Internet Explorer': 'ie', + InternetArchiveCrawler: 'internetarchivecrawler', 'K-Meleon': 'k_meleon', Maxthon: 'maxthon', + MetaWebCrawler: 'metawebcrawler', 'Microsoft Edge': 'edge', 'MZ Browser': 'mz', 'NAVER Whale Browser': 'naver', @@ -24,10 +30,12 @@ export const BROWSER_ALIASES_MAP = { 'Opera Coast': 'opera_coast', 'Pale Moon': 'pale_moon', PhantomJS: 'phantomjs', + PingdomBot: 'pingdombot', Puffin: 'puffin', - QupZilla: 'qupzilla', QQ: 'qq', QQLite: 'qqlite', + QupZilla: 'qupzilla', + Roku: 'roku', Safari: 'safari', Sailfish: 'sailfish', 'Samsung Internet for Android': 'samsung_internet', @@ -39,38 +47,47 @@ export const BROWSER_ALIASES_MAP = { Vivaldi: 'vivaldi', 'WebOS Browser': 'webos', WeChat: 'wechat', + YahooSlurp: 'yahooslurp', 'Yandex Browser': 'yandex', - Roku: 'roku', + YandexBot: 'yandexbot', }; export const BROWSER_MAP = { + amazonbot: 'AmazonBot', amazon_silk: 'Amazon Silk', android: 'Android Browser', + baiduspider: 'BaiduSpider', bada: 'Bada', + bingcrawler: 'BingCrawler', blackberry: 'BlackBerry', chrome: 'Chrome', chromium: 'Chromium', + duckduckbot: 'DuckDuckBot', + edge: 'Microsoft Edge', electron: 'Electron', epiphany: 'Epiphany', firefox: 'Firefox', focus: 'Focus', generic: 'Generic', - googlebot: 'Googlebot', google_search: 'Google Search', + googlebot: 'Googlebot', ie: 'Internet Explorer', + internetarchivecrawler: 'InternetArchiveCrawler', k_meleon: 'K-Meleon', maxthon: 'Maxthon', - edge: 'Microsoft Edge', + metawebcrawler: 'MetaWebCrawler', mz: 'MZ Browser', naver: 'NAVER Whale Browser', opera: 'Opera', opera_coast: 'Opera Coast', pale_moon: 'Pale Moon', phantomjs: 'PhantomJS', + pingdombot: 'PingdomBot', puffin: 'Puffin', - qupzilla: 'QupZilla', qq: 'QQ Browser', qqlite: 'QQ Browser Lite', + qupzilla: 'QupZilla', + roku: 'Roku', safari: 'Safari', sailfish: 'Sailfish', samsung_internet: 'Samsung Internet for Android', @@ -82,39 +99,41 @@ export const BROWSER_MAP = { vivaldi: 'Vivaldi', webos: 'WebOS Browser', wechat: 'WeChat', + yahooslurp: 'YahooSlurp', yandex: 'Yandex Browser', + yandexbot: 'YandexBot', }; export const PLATFORMS_MAP = { - tablet: 'tablet', - mobile: 'mobile', - desktop: 'desktop', - tv: 'tv', bot: 'bot', + desktop: 'desktop', + mobile: 'mobile', + tablet: 'tablet', + tv: 'tv', }; export const OS_MAP = { - WindowsPhone: 'Windows Phone', - Windows: 'Windows', - MacOS: 'macOS', - iOS: 'iOS', Android: 'Android', - WebOS: 'WebOS', - BlackBerry: 'BlackBerry', Bada: 'Bada', - Tizen: 'Tizen', - Linux: 'Linux', + BlackBerry: 'BlackBerry', ChromeOS: 'Chrome OS', + HarmonyOS: 'HarmonyOS', + iOS: 'iOS', + Linux: 'Linux', + MacOS: 'macOS', PlayStation4: 'PlayStation 4', Roku: 'Roku', - HarmonyOS: 'HarmonyOS', + Tizen: 'Tizen', + WebOS: 'WebOS', + Windows: 'Windows', + WindowsPhone: 'Windows Phone', }; export const ENGINE_MAP = { - EdgeHTML: 'EdgeHTML', Blink: 'Blink', - Trident: 'Trident', - Presto: 'Presto', + EdgeHTML: 'EdgeHTML', Gecko: 'Gecko', + Presto: 'Presto', + Trident: 'Trident', WebKit: 'WebKit', }; diff --git a/src/parser-browsers.js b/src/parser-browsers.js index 997d724..55d4267 100644 --- a/src/parser-browsers.js +++ b/src/parser-browsers.js @@ -45,6 +45,131 @@ const browsersList = [ }, }, + /* AmazonBot */ + { + test: [/amazonbot/i], + describe(ua) { + const browser = { + name: 'AmazonBot', + }; + const version = Utils.getFirstMatch(/amazonbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* BingCrawler */ + { + test: [/bingbot/i], + describe(ua) { + const browser = { + name: 'BingCrawler', + }; + const version = Utils.getFirstMatch(/bingbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* BaiduSpider */ + { + test: [/baiduspider/i], + describe(ua) { + const browser = { + name: 'BaiduSpider', + }; + const version = Utils.getFirstMatch(/baiduspider\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* DuckDuckBot */ + { + test: [/duckduckbot/i], + describe(ua) { + const browser = { + name: 'DuckDuckBot', + }; + const version = Utils.getFirstMatch(/duckduckbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* InternetArchiveCrawler */ + { + test: [/ia_archiver/i], + describe(ua) { + const browser = { + name: 'InternetArchiveCrawler', + }; + const version = Utils.getFirstMatch(/ia_archiver\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* MetaWebCrawler */ + { + test: [/facebookexternalhit/i, /facebookcatalog/i], + describe() { + return { + name: 'MetaWebCrawler', + }; + }, + }, + + /* YahooSlurp */ + { + test: [/yahoo!?[\s/]*slurp/i], + describe() { + return { + name: 'YahooSlurp', + }; + }, + }, + + /* YandexBot */ + { + test: [/yandexbot/i, /yandexmobilebot/i], + describe() { + return { + name: 'YandexBot', + }; + }, + }, + + /* PingdomBot */ + { + test: [/pingdom/i], + describe() { + return { + name: 'PingdomBot', + }; + }, + }, + /* Opera < 13.0 */ { test: [/opera/i], diff --git a/src/parser-platforms.js b/src/parser-platforms.js index 15eedac..36e1fb5 100644 --- a/src/parser-platforms.js +++ b/src/parser-platforms.js @@ -18,6 +18,105 @@ export default [ }, }, + /* AmazonBot */ + { + test: [/amazonbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Amazon', + }; + }, + }, + + /* Baidu */ + { + test: [/baiduspider/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Baidu', + }; + }, + }, + + /* Bingbot */ + { + test: [/bingbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Bing', + }; + }, + }, + + /* DuckDuckBot */ + { + test: [/duckduckbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'DuckDuckGo', + }; + }, + }, + + /* Internet Archive Crawler */ + { + test: [/ia_archiver/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Internet Archive', + }; + }, + }, + + /* Meta Web Crawler */ + { + test: [/facebookexternalhit/i, /facebookcatalog/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Meta', + }; + }, + }, + + /* Yahoo! Slurp */ + { + test: [/yahoo/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Yahoo', + }; + }, + }, + + /* Yandex */ + { + test: [/yandexbot/i, /yandexmobilebot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Yandex', + }; + }, + }, + + /* Pingdom */ + { + test: [/pingdom/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Pingdom', + }; + }, + }, + /* Huawei */ { test: [/huawei/i], diff --git a/test/acceptance/useragentstrings.yml b/test/acceptance/useragentstrings.yml index 76d08fb..008dc94 100644 --- a/test/acceptance/useragentstrings.yml +++ b/test/acceptance/useragentstrings.yml @@ -2549,6 +2549,174 @@ vendor: "Google" engine: name: "Blink" + AmazonBot: + - + ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)" + spec: + browser: + name: "AmazonBot" + version: "0.1" + os: + name: "macOS" + version: "10.10.1" + versionName: "Yosemite" + platform: + type: "bot" + vendor: "Amazon" + engine: + name: "WebKit" + version: "600.2.5" + BingCrawler: + - + ua: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/" + spec: + browser: + name: "BingCrawler" + version: "2.0" + os: {} + platform: + type: "bot" + vendor: "Bing" + engine: + name: "Blink" + - + ua: "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) 80.0.345.0 Safari/537.36" + spec: + browser: + name: "BingCrawler" + version: "2.0" + os: {} + platform: + type: "bot" + vendor: "Bing" + engine: {} + - + ua: "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.345.0 Mobile Safari/537.36 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)" + spec: + browser: + name: "BingCrawler" + version: "2.0" + os: + name: "Android" + version: "6.0.1" + versionName: "Marshmallow" + platform: + type: "bot" + vendor: "Bing" + engine: + name: "Blink" + BaiduSpider: + - + ua: "Baiduspider" + spec: + browser: + name: "BaiduSpider" + os: {} + platform: + type: "bot" + vendor: "Baidu" + engine: {} + DuckDuckBot: + - + ua: "DuckDuckBot/1.1; (+http://duckduckgo.com/duckduckbot.html)" + spec: + browser: + name: "DuckDuckBot" + version: "1.1" + os: {} + platform: + type: "bot" + vendor: "DuckDuckGo" + engine: {} + InternetArchiveCrawler: + - + ua: "ia_archiver" + spec: + browser: + name: "InternetArchiveCrawler" + os: {} + platform: + type: "bot" + vendor: "Internet Archive" + engine: {} + MetaWebCrawler: + - + ua: "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)" + spec: + browser: + name: "MetaWebCrawler" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + - + ua: "facebookexternalhit/1.1" + spec: + browser: + name: "MetaWebCrawler" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + - + ua: "facebookcatalog/1.0" + spec: + browser: + name: "MetaWebCrawler" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + YahooSlurp: + - + ua: "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)" + spec: + browser: + name: "YahooSlurp" + os: {} + platform: + type: "bot" + vendor: "Yahoo" + engine: {} + YandexBot: + - + ua: "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)" + spec: + browser: + name: "YandexBot" + os: {} + platform: + type: "bot" + vendor: "Yandex" + engine: {} + - + ua: "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)" + spec: + browser: + name: "YandexBot" + os: + name: "iOS" + version: "8.1" + platform: + type: "bot" + vendor: "Yandex" + engine: + name: "WebKit" + version: "600.1.4" + PingdomBot: + - + ua: "Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)" + spec: + browser: + name: "PingdomBot" + os: {} + platform: + type: "bot" + vendor: "Pingdom" + engine: {} WeChat: - ua: "Mozilla/5.0 (iPad; U; CPU OS 9 like Mac OS X; en-us; iPad4,4) AppleWebKit/534.46 (KHTML, like Gecko) MicroMessenger/6.5.2.501 U3/1 Safari/7543.48.3"