diff --git a/src/constants.js b/src/constants.js index e16e23d..9f3d205 100644 --- a/src/constants.js +++ b/src/constants.js @@ -8,27 +8,39 @@ export const BROWSER_ALIASES_MAP = { Bada: 'bada', BingCrawler: 'bingcrawler', BlackBerry: 'blackberry', + 'ChatGPT-User': 'chatgpt_user', Chrome: 'chrome', + ClaudeBot: 'claudebot', Chromium: 'chromium', + Diffbot: 'diffbot', DuckDuckBot: 'duckduckbot', Electron: 'electron', Epiphany: 'epiphany', + FacebookExternalHit: 'facebookexternalhit', Firefox: 'firefox', Focus: 'focus', Generic: 'generic', 'Google Search': 'google_search', Googlebot: 'googlebot', + GPTBot: 'gptbot', 'Internet Explorer': 'ie', InternetArchiveCrawler: 'internetarchivecrawler', 'K-Meleon': 'k_meleon', Maxthon: 'maxthon', - MetaWebCrawler: 'metawebcrawler', + 'Meta-ExternalAds': 'meta_externalads', + 'Meta-ExternalAgent': 'meta_externalagent', + 'Meta-ExternalFetcher': 'meta_externalfetcher', + 'Meta-WebIndexer': 'meta_webindexer', 'Microsoft Edge': 'edge', 'MZ Browser': 'mz', 'NAVER Whale Browser': 'naver', + 'OAI-SearchBot': 'oai_searchbot', + Omgilibot: 'omgilibot', Opera: 'opera', 'Opera Coast': 'opera_coast', 'Pale Moon': 'pale_moon', + PerplexityBot: 'perplexitybot', + 'Perplexity-User': 'perplexity_user', PhantomJS: 'phantomjs', PingdomBot: 'pingdombot', Puffin: 'puffin', @@ -50,6 +62,7 @@ export const BROWSER_ALIASES_MAP = { YahooSlurp: 'yahooslurp', 'Yandex Browser': 'yandex', YandexBot: 'yandexbot', + YouBot: 'youbot', }; export const BROWSER_MAP = { @@ -60,27 +73,39 @@ export const BROWSER_MAP = { bada: 'Bada', bingcrawler: 'BingCrawler', blackberry: 'BlackBerry', + chatgpt_user: 'ChatGPT-User', chrome: 'Chrome', + claudebot: 'ClaudeBot', chromium: 'Chromium', + diffbot: 'Diffbot', duckduckbot: 'DuckDuckBot', edge: 'Microsoft Edge', electron: 'Electron', epiphany: 'Epiphany', + facebookexternalhit: 'FacebookExternalHit', firefox: 'Firefox', focus: 'Focus', generic: 'Generic', google_search: 'Google Search', googlebot: 'Googlebot', + gptbot: 'GPTBot', ie: 'Internet Explorer', internetarchivecrawler: 'InternetArchiveCrawler', k_meleon: 'K-Meleon', maxthon: 'Maxthon', - metawebcrawler: 'MetaWebCrawler', + meta_externalads: 'Meta-ExternalAds', + meta_externalagent: 'Meta-ExternalAgent', + meta_externalfetcher: 'Meta-ExternalFetcher', + meta_webindexer: 'Meta-WebIndexer', mz: 'MZ Browser', naver: 'NAVER Whale Browser', + oai_searchbot: 'OAI-SearchBot', + omgilibot: 'Omgilibot', opera: 'Opera', opera_coast: 'Opera Coast', pale_moon: 'Pale Moon', + perplexitybot: 'PerplexityBot', + perplexity_user: 'Perplexity-User', phantomjs: 'PhantomJS', pingdombot: 'PingdomBot', puffin: 'Puffin', @@ -102,6 +127,7 @@ export const BROWSER_MAP = { yahooslurp: 'YahooSlurp', yandex: 'Yandex Browser', yandexbot: 'YandexBot', + youbot: 'YouBot', }; export const PLATFORMS_MAP = { diff --git a/src/parser-browsers.js b/src/parser-browsers.js index 55d4267..621bbb9 100644 --- a/src/parser-browsers.js +++ b/src/parser-browsers.js @@ -28,6 +28,227 @@ import Utils from './utils.js'; const commonVersionIdentifier = /version\/(\d+(\.?_?\d+)+)/i; const browsersList = [ + /* GPTBot */ + { + test: [/gptbot/i], + describe(ua) { + const browser = { + name: 'GPTBot', + }; + const version = Utils.getFirstMatch(/gptbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* ChatGPT-User */ + { + test: [/chatgpt-user/i], + describe(ua) { + const browser = { + name: 'ChatGPT-User', + }; + const version = Utils.getFirstMatch(/chatgpt-user\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* OAI-SearchBot */ + { + test: [/oai-searchbot/i], + describe(ua) { + const browser = { + name: 'OAI-SearchBot', + }; + const version = Utils.getFirstMatch(/oai-searchbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* ClaudeBot */ + { + test: [/claudebot/i, /claude-web/i, /claude-user/i, /claude-searchbot/i], + describe(ua) { + const browser = { + name: 'ClaudeBot', + }; + const version = Utils.getFirstMatch(/(?:claudebot|claude-web|claude-user|claude-searchbot)\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Omgilibot */ + { + test: [/omgilibot/i, /webzio-extended/i], + describe(ua) { + const browser = { + name: 'Omgilibot', + }; + const version = Utils.getFirstMatch(/(?:omgilibot|webzio-extended)\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Diffbot */ + { + test: [/diffbot/i], + describe(ua) { + const browser = { + name: 'Diffbot', + }; + const version = Utils.getFirstMatch(/diffbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* PerplexityBot */ + { + test: [/perplexitybot/i], + describe(ua) { + const browser = { + name: 'PerplexityBot', + }; + const version = Utils.getFirstMatch(/perplexitybot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Perplexity-User */ + { + test: [/perplexity-user/i], + describe(ua) { + const browser = { + name: 'Perplexity-User', + }; + const version = Utils.getFirstMatch(/perplexity-user\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* YouBot */ + { + test: [/youbot/i], + describe(ua) { + const browser = { + name: 'YouBot', + }; + const version = Utils.getFirstMatch(/youbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Meta-WebIndexer */ + { + test: [/meta-webindexer/i], + describe(ua) { + const browser = { + name: 'Meta-WebIndexer', + }; + const version = Utils.getFirstMatch(/meta-webindexer\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Meta-ExternalAds */ + { + test: [/meta-externalads/i], + describe(ua) { + const browser = { + name: 'Meta-ExternalAds', + }; + const version = Utils.getFirstMatch(/meta-externalads\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Meta-ExternalAgent */ + { + test: [/meta-externalagent/i], + describe(ua) { + const browser = { + name: 'Meta-ExternalAgent', + }; + const version = Utils.getFirstMatch(/meta-externalagent\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + + /* Meta-ExternalFetcher */ + { + test: [/meta-externalfetcher/i], + describe(ua) { + const browser = { + name: 'Meta-ExternalFetcher', + }; + const version = Utils.getFirstMatch(/meta-externalfetcher\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + /* Googlebot */ { test: [/googlebot/i], @@ -130,12 +351,12 @@ const browsersList = [ }, }, - /* MetaWebCrawler */ + /* FacebookExternalHit */ { test: [/facebookexternalhit/i, /facebookcatalog/i], describe() { return { - name: 'MetaWebCrawler', + name: 'FacebookExternalHit', }; }, }, diff --git a/src/parser-platforms.js b/src/parser-platforms.js index 36e1fb5..74889a5 100644 --- a/src/parser-platforms.js +++ b/src/parser-platforms.js @@ -29,6 +29,39 @@ export default [ }, }, + /* GPTBot */ + { + test: [/gptbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'OpenAI', + }; + }, + }, + + /* ChatGPT-User */ + { + test: [/chatgpt-user/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'OpenAI', + }; + }, + }, + + /* OAI-SearchBot */ + { + test: [/oai-searchbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'OpenAI', + }; + }, + }, + /* Baidu */ { test: [/baiduspider/i], @@ -62,6 +95,72 @@ export default [ }, }, + /* ClaudeBot */ + { + test: [/claudebot/i, /claude-web/i, /claude-user/i, /claude-searchbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Anthropic', + }; + }, + }, + + /* Omgilibot */ + { + test: [/omgilibot/i, /webzio-extended/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Webz.io', + }; + }, + }, + + /* Diffbot */ + { + test: [/diffbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Diffbot', + }; + }, + }, + + /* PerplexityBot */ + { + test: [/perplexitybot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Perplexity AI', + }; + }, + }, + + /* Perplexity-User */ + { + test: [/perplexity-user/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Perplexity AI', + }; + }, + }, + + /* YouBot */ + { + test: [/youbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'You.com', + }; + }, + }, + /* Internet Archive Crawler */ { test: [/ia_archiver/i], @@ -73,6 +172,50 @@ export default [ }, }, + /* Meta-WebIndexer */ + { + test: [/meta-webindexer/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Meta', + }; + }, + }, + + /* Meta-ExternalAds */ + { + test: [/meta-externalads/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Meta', + }; + }, + }, + + /* Meta-ExternalAgent */ + { + test: [/meta-externalagent/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Meta', + }; + }, + }, + + /* Meta-ExternalFetcher */ + { + test: [/meta-externalfetcher/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Meta', + }; + }, + }, + /* Meta Web Crawler */ { test: [/facebookexternalhit/i, /facebookcatalog/i], diff --git a/test/acceptance/useragentstrings.yml b/test/acceptance/useragentstrings.yml index 008dc94..0f41035 100644 --- a/test/acceptance/useragentstrings.yml +++ b/test/acceptance/useragentstrings.yml @@ -2549,6 +2549,211 @@ vendor: "Google" engine: name: "Blink" + GPTBot: + - + ua: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.1; +https://openai.com/gptbot)" + spec: + browser: + name: "GPTBot" + version: "1.1" + os: {} + platform: + type: "bot" + vendor: "OpenAI" + engine: + name: "Blink" + ChatGPT-User: + - + ua: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ChatGPT-User/1.0; +https://openai.com/bot)" + spec: + browser: + name: "ChatGPT-User" + version: "1.0" + os: {} + platform: + type: "bot" + vendor: "OpenAI" + engine: + name: "Blink" + OAI-SearchBot: + - + ua: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot)" + spec: + browser: + name: "OAI-SearchBot" + version: "1.0" + os: {} + platform: + type: "bot" + vendor: "OpenAI" + engine: + name: "Blink" + ClaudeBot: + - + ua: "Mozilla/5.0 (compatible; ClaudeBot/1.0; +https://www.anthropic.com/claudebot)" + spec: + browser: + name: "ClaudeBot" + version: "1.0" + os: {} + platform: + type: "bot" + vendor: "Anthropic" + engine: {} + Omgilibot: + - + ua: "Mozilla/5.0 (compatible; Omgilibot/1.0; +https://www.omgili.com)" + spec: + browser: + name: "Omgilibot" + version: "1.0" + os: {} + platform: + type: "bot" + vendor: "Webz.io" + engine: {} + Meta-WebIndexer: + - + ua: "meta-webindexer/1.1" + spec: + browser: + name: "Meta-WebIndexer" + version: "1.1" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + - + ua: "meta-webindexer/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)" + spec: + browser: + name: "Meta-WebIndexer" + version: "1.1" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + Meta-ExternalAds: + - + ua: "meta-externalads/1.1" + spec: + browser: + name: "Meta-ExternalAds" + version: "1.1" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + - + ua: "meta-externalads/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)" + spec: + browser: + name: "Meta-ExternalAds" + version: "1.1" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + Meta-ExternalAgent: + - + ua: "meta-externalagent/1.1" + spec: + browser: + name: "Meta-ExternalAgent" + version: "1.1" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + - + ua: "meta-externalagent/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)" + spec: + browser: + name: "Meta-ExternalAgent" + version: "1.1" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + Meta-ExternalFetcher: + - + ua: "meta-externalfetcher/1.1" + spec: + browser: + name: "Meta-ExternalFetcher" + version: "1.1" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + - + ua: "meta-externalfetcher/1.1 (+https://developers.facebook.com/docs/sharing/webmasters/crawler)" + spec: + browser: + name: "Meta-ExternalFetcher" + version: "1.1" + os: {} + platform: + type: "bot" + vendor: "Meta" + engine: {} + Diffbot: + - + ua: "Mozilla/5.0 (compatible; Diffbot/3.0; +http://www.diffbot.com)" + spec: + browser: + name: "Diffbot" + version: "3.0" + os: {} + platform: + type: "bot" + vendor: "Diffbot" + engine: {} + PerplexityBot: + - + ua: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; PerplexityBot/1.0; +https://perplexity.ai/perplexitybot)" + spec: + browser: + name: "PerplexityBot" + version: "1.0" + os: {} + platform: + type: "bot" + vendor: "Perplexity AI" + engine: + name: "Blink" + Perplexity-User: + - + ua: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Perplexity-User/1.0; +https://perplexity.ai/perplexity-user)" + spec: + browser: + name: "Perplexity-User" + version: "1.0" + os: {} + platform: + type: "bot" + vendor: "Perplexity AI" + engine: + name: "Blink" + YouBot: + - + ua: "Mozilla/5.0 (compatible; YouBot/1.0; +https://you.com/bot)" + spec: + browser: + name: "YouBot" + version: "1.0" + os: {} + platform: + type: "bot" + vendor: "You.com" + engine: {} AmazonBot: - ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)" @@ -2639,12 +2844,12 @@ type: "bot" vendor: "Internet Archive" engine: {} - MetaWebCrawler: + FacebookExternalHit: - ua: "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)" spec: browser: - name: "MetaWebCrawler" + name: "FacebookExternalHit" os: {} platform: type: "bot" @@ -2654,7 +2859,7 @@ ua: "facebookexternalhit/1.1" spec: browser: - name: "MetaWebCrawler" + name: "FacebookExternalHit" os: {} platform: type: "bot" @@ -2664,7 +2869,7 @@ ua: "facebookcatalog/1.0" spec: browser: - name: "MetaWebCrawler" + name: "FacebookExternalHit" os: {} platform: type: "bot"