1
0
mirror of https://github.com/lancedikson/bowser synced 2026-03-02 03:40:27 +00:00

feat: add support for AI crawl bots (#577)

This commit is contained in:
Naor Peled
2025-11-22 18:05:08 +02:00
committed by GitHub
parent 771dfb2dfe
commit f7d2c0693c
4 changed files with 603 additions and 8 deletions

View File

@@ -8,27 +8,39 @@ export const BROWSER_ALIASES_MAP = {
Bada: 'bada',
BingCrawler: 'bingcrawler',
BlackBerry: 'blackberry',
'ChatGPT-User': 'chatgpt_user',
Chrome: 'chrome',
ClaudeBot: 'claudebot',
Chromium: 'chromium',
Diffbot: 'diffbot',
DuckDuckBot: 'duckduckbot',
Electron: 'electron',
Epiphany: 'epiphany',
FacebookExternalHit: 'facebookexternalhit',
Firefox: 'firefox',
Focus: 'focus',
Generic: 'generic',
'Google Search': 'google_search',
Googlebot: 'googlebot',
GPTBot: 'gptbot',
'Internet Explorer': 'ie',
InternetArchiveCrawler: 'internetarchivecrawler',
'K-Meleon': 'k_meleon',
Maxthon: 'maxthon',
MetaWebCrawler: 'metawebcrawler',
'Meta-ExternalAds': 'meta_externalads',
'Meta-ExternalAgent': 'meta_externalagent',
'Meta-ExternalFetcher': 'meta_externalfetcher',
'Meta-WebIndexer': 'meta_webindexer',
'Microsoft Edge': 'edge',
'MZ Browser': 'mz',
'NAVER Whale Browser': 'naver',
'OAI-SearchBot': 'oai_searchbot',
Omgilibot: 'omgilibot',
Opera: 'opera',
'Opera Coast': 'opera_coast',
'Pale Moon': 'pale_moon',
PerplexityBot: 'perplexitybot',
'Perplexity-User': 'perplexity_user',
PhantomJS: 'phantomjs',
PingdomBot: 'pingdombot',
Puffin: 'puffin',
@@ -50,6 +62,7 @@ export const BROWSER_ALIASES_MAP = {
YahooSlurp: 'yahooslurp',
'Yandex Browser': 'yandex',
YandexBot: 'yandexbot',
YouBot: 'youbot',
};
export const BROWSER_MAP = {
@@ -60,27 +73,39 @@ export const BROWSER_MAP = {
bada: 'Bada',
bingcrawler: 'BingCrawler',
blackberry: 'BlackBerry',
chatgpt_user: 'ChatGPT-User',
chrome: 'Chrome',
claudebot: 'ClaudeBot',
chromium: 'Chromium',
diffbot: 'Diffbot',
duckduckbot: 'DuckDuckBot',
edge: 'Microsoft Edge',
electron: 'Electron',
epiphany: 'Epiphany',
facebookexternalhit: 'FacebookExternalHit',
firefox: 'Firefox',
focus: 'Focus',
generic: 'Generic',
google_search: 'Google Search',
googlebot: 'Googlebot',
gptbot: 'GPTBot',
ie: 'Internet Explorer',
internetarchivecrawler: 'InternetArchiveCrawler',
k_meleon: 'K-Meleon',
maxthon: 'Maxthon',
metawebcrawler: 'MetaWebCrawler',
meta_externalads: 'Meta-ExternalAds',
meta_externalagent: 'Meta-ExternalAgent',
meta_externalfetcher: 'Meta-ExternalFetcher',
meta_webindexer: 'Meta-WebIndexer',
mz: 'MZ Browser',
naver: 'NAVER Whale Browser',
oai_searchbot: 'OAI-SearchBot',
omgilibot: 'Omgilibot',
opera: 'Opera',
opera_coast: 'Opera Coast',
pale_moon: 'Pale Moon',
perplexitybot: 'PerplexityBot',
perplexity_user: 'Perplexity-User',
phantomjs: 'PhantomJS',
pingdombot: 'PingdomBot',
puffin: 'Puffin',
@@ -102,6 +127,7 @@ export const BROWSER_MAP = {
yahooslurp: 'YahooSlurp',
yandex: 'Yandex Browser',
yandexbot: 'YandexBot',
youbot: 'YouBot',
};
export const PLATFORMS_MAP = {

View File

@@ -28,6 +28,227 @@ import Utils from './utils.js';
const commonVersionIdentifier = /version\/(\d+(\.?_?\d+)+)/i;
const browsersList = [
/* GPTBot */
{
test: [/gptbot/i],
describe(ua) {
const browser = {
name: 'GPTBot',
};
const version = Utils.getFirstMatch(/gptbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* ChatGPT-User */
{
test: [/chatgpt-user/i],
describe(ua) {
const browser = {
name: 'ChatGPT-User',
};
const version = Utils.getFirstMatch(/chatgpt-user\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* OAI-SearchBot */
{
test: [/oai-searchbot/i],
describe(ua) {
const browser = {
name: 'OAI-SearchBot',
};
const version = Utils.getFirstMatch(/oai-searchbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* ClaudeBot */
{
test: [/claudebot/i, /claude-web/i, /claude-user/i, /claude-searchbot/i],
describe(ua) {
const browser = {
name: 'ClaudeBot',
};
const version = Utils.getFirstMatch(/(?:claudebot|claude-web|claude-user|claude-searchbot)\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Omgilibot */
{
test: [/omgilibot/i, /webzio-extended/i],
describe(ua) {
const browser = {
name: 'Omgilibot',
};
const version = Utils.getFirstMatch(/(?:omgilibot|webzio-extended)\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Diffbot */
{
test: [/diffbot/i],
describe(ua) {
const browser = {
name: 'Diffbot',
};
const version = Utils.getFirstMatch(/diffbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* PerplexityBot */
{
test: [/perplexitybot/i],
describe(ua) {
const browser = {
name: 'PerplexityBot',
};
const version = Utils.getFirstMatch(/perplexitybot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Perplexity-User */
{
test: [/perplexity-user/i],
describe(ua) {
const browser = {
name: 'Perplexity-User',
};
const version = Utils.getFirstMatch(/perplexity-user\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* YouBot */
{
test: [/youbot/i],
describe(ua) {
const browser = {
name: 'YouBot',
};
const version = Utils.getFirstMatch(/youbot\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Meta-WebIndexer */
{
test: [/meta-webindexer/i],
describe(ua) {
const browser = {
name: 'Meta-WebIndexer',
};
const version = Utils.getFirstMatch(/meta-webindexer\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Meta-ExternalAds */
{
test: [/meta-externalads/i],
describe(ua) {
const browser = {
name: 'Meta-ExternalAds',
};
const version = Utils.getFirstMatch(/meta-externalads\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Meta-ExternalAgent */
{
test: [/meta-externalagent/i],
describe(ua) {
const browser = {
name: 'Meta-ExternalAgent',
};
const version = Utils.getFirstMatch(/meta-externalagent\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Meta-ExternalFetcher */
{
test: [/meta-externalfetcher/i],
describe(ua) {
const browser = {
name: 'Meta-ExternalFetcher',
};
const version = Utils.getFirstMatch(/meta-externalfetcher\/(\d+(\.\d+)+)/i, ua) || Utils.getFirstMatch(commonVersionIdentifier, ua);
if (version) {
browser.version = version;
}
return browser;
},
},
/* Googlebot */
{
test: [/googlebot/i],
@@ -130,12 +351,12 @@ const browsersList = [
},
},
/* MetaWebCrawler */
/* FacebookExternalHit */
{
test: [/facebookexternalhit/i, /facebookcatalog/i],
describe() {
return {
name: 'MetaWebCrawler',
name: 'FacebookExternalHit',
};
},
},

View File

@@ -29,6 +29,39 @@ export default [
},
},
/* GPTBot */
{
test: [/gptbot/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'OpenAI',
};
},
},
/* ChatGPT-User */
{
test: [/chatgpt-user/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'OpenAI',
};
},
},
/* OAI-SearchBot */
{
test: [/oai-searchbot/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'OpenAI',
};
},
},
/* Baidu */
{
test: [/baiduspider/i],
@@ -62,6 +95,72 @@ export default [
},
},
/* ClaudeBot */
{
test: [/claudebot/i, /claude-web/i, /claude-user/i, /claude-searchbot/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'Anthropic',
};
},
},
/* Omgilibot */
{
test: [/omgilibot/i, /webzio-extended/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'Webz.io',
};
},
},
/* Diffbot */
{
test: [/diffbot/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'Diffbot',
};
},
},
/* PerplexityBot */
{
test: [/perplexitybot/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'Perplexity AI',
};
},
},
/* Perplexity-User */
{
test: [/perplexity-user/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'Perplexity AI',
};
},
},
/* YouBot */
{
test: [/youbot/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'You.com',
};
},
},
/* Internet Archive Crawler */
{
test: [/ia_archiver/i],
@@ -73,6 +172,50 @@ export default [
},
},
/* Meta-WebIndexer */
{
test: [/meta-webindexer/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'Meta',
};
},
},
/* Meta-ExternalAds */
{
test: [/meta-externalads/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'Meta',
};
},
},
/* Meta-ExternalAgent */
{
test: [/meta-externalagent/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'Meta',
};
},
},
/* Meta-ExternalFetcher */
{
test: [/meta-externalfetcher/i],
describe() {
return {
type: PLATFORMS_MAP.bot,
vendor: 'Meta',
};
},
},
/* Meta Web Crawler */
{
test: [/facebookexternalhit/i, /facebookcatalog/i],