diff --git a/src/constants.js b/src/constants.js index ce7d538..2b123b3 100644 --- a/src/constants.js +++ b/src/constants.js @@ -1,7 +1,6 @@ // NOTE: this list must be up-to-date with browsers listed in // test/acceptance/useragentstrings.yml export const BROWSER_ALIASES_MAP = { - 'Applebot-Extended': 'applebot_extended', AmazonBot: 'amazonbot', 'Amazon Silk': 'amazon_silk', 'Android Browser': 'android', @@ -17,12 +16,10 @@ export const BROWSER_ALIASES_MAP = { DuckDuckBot: 'duckduckbot', Electron: 'electron', Epiphany: 'epiphany', - FacebookBot: 'facebookbot', Firefox: 'firefox', Focus: 'focus', Generic: 'generic', 'Google Search': 'google_search', - 'Google-Extended': 'google_extended', Googlebot: 'googlebot', GPTBot: 'gptbot', 'Internet Explorer': 'ie', @@ -30,6 +27,7 @@ export const BROWSER_ALIASES_MAP = { 'K-Meleon': 'k_meleon', Maxthon: 'maxthon', MetaWebCrawler: 'metawebcrawler', + 'Meta-WebIndexer': 'meta_webindexer', 'Microsoft Edge': 'edge', 'MZ Browser': 'mz', 'NAVER Whale Browser': 'naver', @@ -64,7 +62,6 @@ export const BROWSER_ALIASES_MAP = { }; export const BROWSER_MAP = { - applebot_extended: 'Applebot-Extended', amazonbot: 'AmazonBot', amazon_silk: 'Amazon Silk', android: 'Android Browser', @@ -81,11 +78,9 @@ export const BROWSER_MAP = { edge: 'Microsoft Edge', electron: 'Electron', epiphany: 'Epiphany', - facebookbot: 'FacebookBot', firefox: 'Firefox', focus: 'Focus', generic: 'Generic', - google_extended: 'Google-Extended', google_search: 'Google Search', googlebot: 'Googlebot', gptbot: 'GPTBot', @@ -94,6 +89,7 @@ export const BROWSER_MAP = { k_meleon: 'K-Meleon', maxthon: 'Maxthon', metawebcrawler: 'MetaWebCrawler', + meta_webindexer: 'Meta-WebIndexer', mz: 'MZ Browser', naver: 'NAVER Whale Browser', opera: 'Opera', diff --git a/src/parser-browsers.js b/src/parser-browsers.js index dceceb8..f667dbc 100644 --- a/src/parser-browsers.js +++ b/src/parser-browsers.js @@ -79,40 +79,6 @@ const browsersList = [ }, }, - /* Google-Extended */ - { - test: [/google-extended/i], - describe(ua) { - const browser = { - name: 'Google-Extended', - }; - const version = Utils.getFirstMatch(/google-extended\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); - - if (version) { - browser.version = version; - } - - return browser; - }, - }, - - /* Applebot-Extended */ - { - test: [/applebot-extended/i], - describe(ua) { - const browser = { - name: 'Applebot-Extended', - }; - const version = Utils.getFirstMatch(/applebot-extended\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); - - if (version) { - browser.version = version; - } - - return browser; - }, - }, - /* ClaudeBot */ { test: [/claudebot/i, /claude-web/i], @@ -147,23 +113,6 @@ const browsersList = [ }, }, - /* FacebookBot */ - { - test: [/facebookbot/i], - describe(ua) { - const browser = { - name: 'FacebookBot', - }; - const version = Utils.getFirstMatch(/facebookbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); - - if (version) { - browser.version = version; - } - - return browser; - }, - }, - /* Diffbot */ { test: [/diffbot/i], @@ -215,6 +164,23 @@ const browsersList = [ }, }, + /* Meta-WebIndexer */ + { + test: [/meta-webindexer/i], + describe(ua) { + const browser = { + name: 'Meta-WebIndexer', + }; + const version = Utils.getFirstMatch(/meta-webindexer\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua); + + if (version) { + browser.version = version; + } + + return browser; + }, + }, + /* Googlebot */ { test: [/googlebot/i], diff --git a/src/parser-platforms.js b/src/parser-platforms.js index 36e1fb5..5258424 100644 --- a/src/parser-platforms.js +++ b/src/parser-platforms.js @@ -29,6 +29,39 @@ export default [ }, }, + /* GPTBot */ + { + test: [/gptbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'OpenAI', + }; + }, + }, + + /* ChatGPT-User */ + { + test: [/chatgpt-user/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'OpenAI', + }; + }, + }, + + /* OAI-SearchBot */ + { + test: [/oai-searchbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'OpenAI', + }; + }, + }, + /* Baidu */ { test: [/baiduspider/i], @@ -62,6 +95,61 @@ export default [ }, }, + /* ClaudeBot */ + { + test: [/claudebot/i, /claude-web/i, /claude-user/i, /claude-searchbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Anthropic', + }; + }, + }, + + /* Omgilibot */ + { + test: [/omgilibot/i, /webzio-extended/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Omgili', + }; + }, + }, + + /* Diffbot */ + { + test: [/diffbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Diffbot', + }; + }, + }, + + /* PerplexityBot */ + { + test: [/perplexitybot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Perplexity AI', + }; + }, + }, + + /* YouBot */ + { + test: [/youbot/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'You.com', + }; + }, + }, + /* Internet Archive Crawler */ { test: [/ia_archiver/i], @@ -73,6 +161,17 @@ export default [ }, }, + /* Meta-WebIndexer */ + { + test: [/meta-webindexer/i], + describe() { + return { + type: PLATFORMS_MAP.bot, + vendor: 'Meta', + }; + }, + }, + /* Meta Web Crawler */ { test: [/facebookexternalhit/i, /facebookcatalog/i], diff --git a/test/acceptance/useragentstrings.yml b/test/acceptance/useragentstrings.yml index c1ada06..2f473bf 100644 --- a/test/acceptance/useragentstrings.yml +++ b/test/acceptance/useragentstrings.yml @@ -2560,7 +2560,8 @@ platform: type: "bot" vendor: "OpenAI" - engine: {} + engine: + name: "Blink" ChatGPT-User: - ua: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ChatGPT-User/1.0; +https://openai.com/bot)" @@ -2572,7 +2573,8 @@ platform: type: "bot" vendor: "OpenAI" - engine: {} + engine: + name: "Blink" OAI-SearchBot: - ua: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot)" @@ -2584,34 +2586,8 @@ platform: type: "bot" vendor: "OpenAI" - engine: {} - Google-Extended: - - - ua: "Mozilla/5.0 (compatible; Google-Extended; +https://developers.google.com/search/help/google-extended)" - spec: - browser: - name: "Google-Extended" - os: {} - platform: - type: "bot" - vendor: "Google" - engine: {} - Applebot-Extended: - - - ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15 (compatible; Applebot-Extended/1.0; +https://support.apple.com/applebot)" - spec: - browser: - name: "Applebot-Extended" - version: "1.0" - os: - name: "macOS" - version: "10.15.7" - versionName: "Catalina" - platform: - type: "bot" - vendor: "Apple" engine: - name: "WebKit" + name: "Blink" ClaudeBot: - ua: "Mozilla/5.0 (compatible; ClaudeBot/1.0; +https://www.anthropic.com/claudebot)" @@ -2636,13 +2612,13 @@ type: "bot" vendor: "Omgili" engine: {} - FacebookBot: + Meta-WebIndexer: - - ua: "Mozilla/5.0 (compatible; FacebookBot/1.0; +https://developers.facebook.com/docs/sharing/best-practices#crawl)" + ua: "Mozilla/5.0 (compatible; meta-webindexer/1.1; +http://developer.facebook.com/docs/sharing/webmasters/crawler)" spec: browser: - name: "FacebookBot" - version: "1.0" + name: "Meta-WebIndexer" + version: "1.1" os: {} platform: type: "bot"