1
0
mirror of https://github.com/lancedikson/bowser synced 2025-12-05 06:02:14 +00:00

Add support for AI crawl bots

This commit is contained in:
naorpeled 2025-11-22 16:56:19 +02:00
parent 771dfb2dfe
commit fdd5a5e8e6
3 changed files with 344 additions and 0 deletions

View File

@ -1,6 +1,7 @@
// NOTE: this list must be up-to-date with browsers listed in // NOTE: this list must be up-to-date with browsers listed in
// test/acceptance/useragentstrings.yml // test/acceptance/useragentstrings.yml
export const BROWSER_ALIASES_MAP = { export const BROWSER_ALIASES_MAP = {
'Applebot-Extended': 'applebot_extended',
AmazonBot: 'amazonbot', AmazonBot: 'amazonbot',
'Amazon Silk': 'amazon_silk', 'Amazon Silk': 'amazon_silk',
'Android Browser': 'android', 'Android Browser': 'android',
@ -9,15 +10,21 @@ export const BROWSER_ALIASES_MAP = {
BingCrawler: 'bingcrawler', BingCrawler: 'bingcrawler',
BlackBerry: 'blackberry', BlackBerry: 'blackberry',
Chrome: 'chrome', Chrome: 'chrome',
'ChatGPT-User': 'chatgpt_user',
ClaudeBot: 'claudebot',
Chromium: 'chromium', Chromium: 'chromium',
Diffbot: 'diffbot',
DuckDuckBot: 'duckduckbot', DuckDuckBot: 'duckduckbot',
Electron: 'electron', Electron: 'electron',
Epiphany: 'epiphany', Epiphany: 'epiphany',
FacebookBot: 'facebookbot',
Firefox: 'firefox', Firefox: 'firefox',
Focus: 'focus', Focus: 'focus',
Generic: 'generic', Generic: 'generic',
'Google Search': 'google_search', 'Google Search': 'google_search',
'Google-Extended': 'google_extended',
Googlebot: 'googlebot', Googlebot: 'googlebot',
GPTBot: 'gptbot',
'Internet Explorer': 'ie', 'Internet Explorer': 'ie',
InternetArchiveCrawler: 'internetarchivecrawler', InternetArchiveCrawler: 'internetarchivecrawler',
'K-Meleon': 'k_meleon', 'K-Meleon': 'k_meleon',
@ -26,11 +33,14 @@ export const BROWSER_ALIASES_MAP = {
'Microsoft Edge': 'edge', 'Microsoft Edge': 'edge',
'MZ Browser': 'mz', 'MZ Browser': 'mz',
'NAVER Whale Browser': 'naver', 'NAVER Whale Browser': 'naver',
'OAI-SearchBot': 'oai_searchbot',
Omgilibot: 'omgilibot',
Opera: 'opera', Opera: 'opera',
'Opera Coast': 'opera_coast', 'Opera Coast': 'opera_coast',
'Pale Moon': 'pale_moon', 'Pale Moon': 'pale_moon',
PhantomJS: 'phantomjs', PhantomJS: 'phantomjs',
PingdomBot: 'pingdombot', PingdomBot: 'pingdombot',
PerplexityBot: 'perplexitybot',
Puffin: 'puffin', Puffin: 'puffin',
QQ: 'qq', QQ: 'qq',
QQLite: 'qqlite', QQLite: 'qqlite',
@ -50,9 +60,11 @@ export const BROWSER_ALIASES_MAP = {
YahooSlurp: 'yahooslurp', YahooSlurp: 'yahooslurp',
'Yandex Browser': 'yandex', 'Yandex Browser': 'yandex',
YandexBot: 'yandexbot', YandexBot: 'yandexbot',
YouBot: 'youbot',
}; };
export const BROWSER_MAP = { export const BROWSER_MAP = {
applebot_extended: 'Applebot-Extended',
amazonbot: 'AmazonBot', amazonbot: 'AmazonBot',
amazon_silk: 'Amazon Silk', amazon_silk: 'Amazon Silk',
android: 'Android Browser', android: 'Android Browser',
@ -61,16 +73,22 @@ export const BROWSER_MAP = {
bingcrawler: 'BingCrawler', bingcrawler: 'BingCrawler',
blackberry: 'BlackBerry', blackberry: 'BlackBerry',
chrome: 'Chrome', chrome: 'Chrome',
chatgpt_user: 'ChatGPT-User',
claudebot: 'ClaudeBot',
chromium: 'Chromium', chromium: 'Chromium',
diffbot: 'Diffbot',
duckduckbot: 'DuckDuckBot', duckduckbot: 'DuckDuckBot',
edge: 'Microsoft Edge', edge: 'Microsoft Edge',
electron: 'Electron', electron: 'Electron',
epiphany: 'Epiphany', epiphany: 'Epiphany',
facebookbot: 'FacebookBot',
firefox: 'Firefox', firefox: 'Firefox',
focus: 'Focus', focus: 'Focus',
generic: 'Generic', generic: 'Generic',
google_extended: 'Google-Extended',
google_search: 'Google Search', google_search: 'Google Search',
googlebot: 'Googlebot', googlebot: 'Googlebot',
gptbot: 'GPTBot',
ie: 'Internet Explorer', ie: 'Internet Explorer',
internetarchivecrawler: 'InternetArchiveCrawler', internetarchivecrawler: 'InternetArchiveCrawler',
k_meleon: 'K-Meleon', k_meleon: 'K-Meleon',
@ -83,6 +101,9 @@ export const BROWSER_MAP = {
pale_moon: 'Pale Moon', pale_moon: 'Pale Moon',
phantomjs: 'PhantomJS', phantomjs: 'PhantomJS',
pingdombot: 'PingdomBot', pingdombot: 'PingdomBot',
perplexitybot: 'PerplexityBot',
oai_searchbot: 'OAI-SearchBot',
omgilibot: 'Omgilibot',
puffin: 'Puffin', puffin: 'Puffin',
qq: 'QQ Browser', qq: 'QQ Browser',
qqlite: 'QQ Browser Lite', qqlite: 'QQ Browser Lite',
@ -102,6 +123,7 @@ export const BROWSER_MAP = {
yahooslurp: 'YahooSlurp', yahooslurp: 'YahooSlurp',
yandex: 'Yandex Browser', yandex: 'Yandex Browser',
yandexbot: 'YandexBot', yandexbot: 'YandexBot',
youbot: 'YouBot',
}; };
export const PLATFORMS_MAP = { export const PLATFORMS_MAP = {

View File

@ -28,6 +28,193 @@ import Utils from './utils.js';
const commonVersionIdentifier = /version\/(\d+(\.?_?\d+)+)/i; const commonVersionIdentifier = /version\/(\d+(\.?_?\d+)+)/i;
const browsersList = [ const browsersList = [
/* GPTBot */
{
test: [/gptbot/i],
describe(ua) {
const browser = {
name: 'GPTBot',
};
const version = Utils.getFirstMatch(/gptbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* ChatGPT-User */
{
test: [/chatgpt-user/i],
describe(ua) {
const browser = {
name: 'ChatGPT-User',
};
const version = Utils.getFirstMatch(/chatgpt-user\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* OAI-SearchBot */
{
test: [/oai-searchbot/i],
describe(ua) {
const browser = {
name: 'OAI-SearchBot',
};
const version = Utils.getFirstMatch(/oai-searchbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Google-Extended */
{
test: [/google-extended/i],
describe(ua) {
const browser = {
name: 'Google-Extended',
};
const version = Utils.getFirstMatch(/google-extended\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Applebot-Extended */
{
test: [/applebot-extended/i],
describe(ua) {
const browser = {
name: 'Applebot-Extended',
};
const version = Utils.getFirstMatch(/applebot-extended\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* ClaudeBot */
{
test: [/claudebot/i, /claude-web/i],
describe(ua) {
const browser = {
name: 'ClaudeBot',
};
const version = Utils.getFirstMatch(/(?:claudebot|claude-web)\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Omgilibot */
{
test: [/omgilibot/i],
describe(ua) {
const browser = {
name: 'Omgilibot',
};
const version = Utils.getFirstMatch(/omgilibot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* FacebookBot */
{
test: [/facebookbot/i],
describe(ua) {
const browser = {
name: 'FacebookBot',
};
const version = Utils.getFirstMatch(/facebookbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Diffbot */
{
test: [/diffbot/i],
describe(ua) {
const browser = {
name: 'Diffbot',
};
const version = Utils.getFirstMatch(/diffbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* PerplexityBot */
{
test: [/perplexitybot/i],
describe(ua) {
const browser = {
name: 'PerplexityBot',
};
const version = Utils.getFirstMatch(/perplexitybot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* YouBot */
{
test: [/youbot/i],
describe(ua) {
const browser = {
name: 'YouBot',
};
const version = Utils.getFirstMatch(/youbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Googlebot */ /* Googlebot */
{ {
test: [/googlebot/i], test: [/googlebot/i],

View File

@ -2549,6 +2549,141 @@
vendor: "Google" vendor: "Google"
engine: engine:
name: "Blink" name: "Blink"
GPTBot:
-
ua: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.1; +https://openai.com/gptbot)"
spec:
browser:
name: "GPTBot"
version: "1.1"
os: {}
platform:
type: "bot"
vendor: "OpenAI"
engine: {}
ChatGPT-User:
-
ua: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ChatGPT-User/1.0; +https://openai.com/bot)"
spec:
browser:
name: "ChatGPT-User"
version: "1.0"
os: {}
platform:
type: "bot"
vendor: "OpenAI"
engine: {}
OAI-SearchBot:
-
ua: "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot)"
spec:
browser:
name: "OAI-SearchBot"
version: "1.0"
os: {}
platform:
type: "bot"
vendor: "OpenAI"
engine: {}
Google-Extended:
-
ua: "Mozilla/5.0 (compatible; Google-Extended; +https://developers.google.com/search/help/google-extended)"
spec:
browser:
name: "Google-Extended"
os: {}
platform:
type: "bot"
vendor: "Google"
engine: {}
Applebot-Extended:
-
ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15 (compatible; Applebot-Extended/1.0; +https://support.apple.com/applebot)"
spec:
browser:
name: "Applebot-Extended"
version: "1.0"
os:
name: "macOS"
version: "10.15.7"
versionName: "Catalina"
platform:
type: "bot"
vendor: "Apple"
engine:
name: "WebKit"
ClaudeBot:
-
ua: "Mozilla/5.0 (compatible; ClaudeBot/1.0; +https://www.anthropic.com/claudebot)"
spec:
browser:
name: "ClaudeBot"
version: "1.0"
os: {}
platform:
type: "bot"
vendor: "Anthropic"
engine: {}
Omgilibot:
-
ua: "Mozilla/5.0 (compatible; Omgilibot/1.0; +https://www.omgili.com)"
spec:
browser:
name: "Omgilibot"
version: "1.0"
os: {}
platform:
type: "bot"
vendor: "Omgili"
engine: {}
FacebookBot:
-
ua: "Mozilla/5.0 (compatible; FacebookBot/1.0; +https://developers.facebook.com/docs/sharing/best-practices#crawl)"
spec:
browser:
name: "FacebookBot"
version: "1.0"
os: {}
platform:
type: "bot"
vendor: "Meta"
engine: {}
Diffbot:
-
ua: "Mozilla/5.0 (compatible; Diffbot/3.0; +http://www.diffbot.com)"
spec:
browser:
name: "Diffbot"
version: "3.0"
os: {}
platform:
type: "bot"
vendor: "Diffbot"
engine: {}
PerplexityBot:
-
ua: "Mozilla/5.0 (compatible; PerplexityBot/1.0; +https://www.perplexity.ai/bot)"
spec:
browser:
name: "PerplexityBot"
version: "1.0"
os: {}
platform:
type: "bot"
vendor: "Perplexity AI"
engine: {}
YouBot:
-
ua: "Mozilla/5.0 (compatible; YouBot/1.0; +https://you.com/bot)"
spec:
browser:
name: "YouBot"
version: "1.0"
os: {}
platform:
type: "bot"
vendor: "You.com"
engine: {}
AmazonBot: AmazonBot:
- -
ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)" ua: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Amazonbot/0.1; +https://developer.amazon.com/support/amazonbot)"