From e417ea29d7bdf047f8329d2e4fa5aaa519e6223a Mon Sep 17 00:00:00 2001 From: Lucio Martinez Date: Tue, 28 Nov 2023 10:57:13 +0100 Subject: [PATCH 1/5] Add bot platforms --- src/parser-platforms.js | 88 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/src/parser-platforms.js b/src/parser-platforms.js index 15eedac..ddf5b17 100644 --- a/src/parser-platforms.js +++ b/src/parser-platforms.js @@ -18,6 +18,94 @@ export default [ }, }, + /* Alexa */ + { + test: [/ia_archiver/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Amazon', + }; + }, + }, + + /* Baidu */ + { + test: [/baiduspider/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Baidu', + }; + }, + }, + + /* Bingbot */ + { + test: [/bingbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Bing', + }; + }, + }, + + /* DuckDuckBot */ + { + test: [/duckduckbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'DuckDuckGo', + }; + }, + }, + + /* Facebook */ + { + test: [/facebookexternalhit/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Facebook', + }; + }, + }, + + /* Yahoo! Slurp */ + { + test: [/yahoo/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Yahoo', + }; + }, + }, + + /* Yandex */ + { + test: [/yandexbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Yandex', + }; + }, + }, + + /* Pingdom */ + { + test: [/pingdom/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Pingdom', + }; + }, + }, + /* Huawei */ { test: [/huawei/i], From d197a446c221a209d79130598482d4d7239b44d5 Mon Sep 17 00:00:00 2001 From: Lucio Martinez Date: Wed, 21 Aug 2024 13:56:02 +0200 Subject: [PATCH 2/5] Add Facebook and IA crawler --- src/parser-platforms.js | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/parser-platforms.js b/src/parser-platforms.js index ddf5b17..5327317 100644 --- a/src/parser-platforms.js +++ b/src/parser-platforms.js @@ -18,9 +18,9 @@ export default [ }, }, - /* Alexa */ + /* AmazonBot */ { - test: [/ia_archiver/i], + test: [/Amazonbot/i], describe() { return { type: PLATFORMS_MAP.bot, @@ -62,13 +62,24 @@ export default [ }, }, - /* Facebook */ + /* Internet Archive Crawler */ { - test: [/facebookexternalhit/i], + test: [/ia_archiver/i], describe() { return { type: PLATFORMS_MAP.bot, - vendor: 'Facebook', + vendor: 'Internet Archive', + }; + }, + }, + + /* Meta Web Crawler */ + { + test: [/facebookexternalhit/i, /facebookcatalog/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Meta', }; }, }, From d668404cc1867992056a8ba900e32a199bef99b5 Mon Sep 17 00:00:00 2001 From: Lucio Martinez Date: Wed, 21 Aug 2024 14:04:17 +0200 Subject: [PATCH 3/5] Add mobile layout of Yandex bot --- src/parser-platforms.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser-platforms.js b/src/parser-platforms.js index 5327317..61d839d 100644 --- a/src/parser-platforms.js +++ b/src/parser-platforms.js @@ -97,7 +97,7 @@ export default [ /* Yandex */ { - test: [/yandexbot/i], + test: [/yandexbot/i, /yandexmobilebot/i], describe() { return { type: PLATFORMS_MAP.bot, From 6647d3bf611ab6f12ef36330851820520da7ed6e Mon Sep 17 00:00:00 2001 From: Lucio Martinez Date: Wed, 21 Aug 2024 14:10:01 +0200 Subject: [PATCH 4/5] Add tests --- test/acceptance/useragentstrings.yml | 159 +++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) diff --git a/test/acceptance/useragentstrings.yml b/test/acceptance/useragentstrings.yml index 9fedcb4..6555eb1 100644 --- a/test/acceptance/useragentstrings.yml +++ b/test/acceptance/useragentstrings.yml @@ -2535,6 +2535,165 @@ vendor: "Google" engine: name: "Blink" + AmazonBot: + - + ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)" + spec: + browser: + name: "AmazonBot" + version: "0.1" + os: + name: "macOS" + version: "10.10.1" + versionName: "Yosemite" + platform: + type: "bot" + vendor: "Amazon" + engine: {} + BingCrawler: + - + ua: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) Chrome/" + spec: + browser: + name: "BingCrawler" + version: "2.0" + os: {} + platform: + type: "bot" + vendor: "Bing" + engine: {} + - + ua: "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm) 80.0.345.0 Safari/537.36" + spec: + browser: + name: "BingCrawler" + version: "2.0" + os: {} + platform: + type: "bot" + vendor: "Bing" + engine: {} + - + ua: "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.345.0 Mobile Safari/537.36 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)" + spec: + browser: + name: "BingCrawler" + version: "2.0" + os: + name: "Android" + version: "6.0.1" + platform: + type: "bot" + vendor: "Bing" + engine: {} + BaiduSpider: + - + ua: "Baiduspider" + spec: + browser: + name: "BaiduSpider" + os: {} + platform: + type: "bot" + vendor: "Baidu" + engine: {} + DuckDuckBot: + - + ua: "DuckDuckBot/1.1; (+http://duckduckgo.com/duckduckbot.html)" + spec: + browser: + name: "DuckDuckBot" + version: "1.1" + os: {} + platform: + type: "bot" + vendor: "DuckDuckGo" + engine: {} + InternetArchiveCrawler: + - + ua: "ia_archiver" + spec: + browser: + name: "InternetArchiveCrawler" + os: {} + platform: + type: "bot" + vendor: "InternetArchive" + engine: {} + MetaWebCrawler: + - + ua: "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)" + spec: + browser: + name: "MetaWebCrawler" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + - + ua: "facebookexternalhit/1.1" + spec: + browser: + name: "MetaWebCrawler" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + - + ua: "facebookcatalog/1.0" + spec: + browser: + name: "MetaWebCrawler" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + YahooSlurp: + - + ua: "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)" + spec: + browser: + name: "YahooSlurp" + os: {} + platform: + type: "bot" + vendor: "Yahoo" + engine: {} + YandexBot: + - + ua: "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)" + spec: + browser: + name: "YandexBot" + os: {} + platform: + type: "bot" + vendor: "Yandex" + engine: {} + - + ua: "Mozilla/5.0 (iPhone; CPU iPhone OS 8_1 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B411 Safari/600.1.4 (compatible; YandexMobileBot/3.0; +http://yandex.com/bots)" + spec: + browser: + name: "YandexBot" + os: {} + platform: + type: "bot" + vendor: "Yandex" + engine: {} + PingdomBot: + - + ua: "Pingdom.com_bot_version_1.4_(http://www.pingdom.com/)" + spec: + browser: + name: "PingdomBot" + os: {} + platform: + type: "bot" + vendor: "Pingdom" + engine: {} WeChat: - ua: "Mozilla/5.0 (iPad; U; CPU OS 9 like Mac OS X; en-us; iPad4,4) AppleWebKit/534.46 (KHTML, like Gecko) MicroMessenger/6.5.2.501 U3/1 Safari/7543.48.3" From 1e86c2e76dc1c5d50a30deb077d01ca5d9107d95 Mon Sep 17 00:00:00 2001 From: Lucio Martinez Date: Wed, 21 Aug 2024 17:20:40 +0200 Subject: [PATCH 5/5] Fix typo --- test/acceptance/useragentstrings.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/acceptance/useragentstrings.yml b/test/acceptance/useragentstrings.yml index 6555eb1..3d06341 100644 --- a/test/acceptance/useragentstrings.yml +++ b/test/acceptance/useragentstrings.yml @@ -2537,7 +2537,7 @@ name: "Blink" AmazonBot: - - ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML\, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)" + ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)" spec: browser: name: "AmazonBot"