Automattic\Jetpack\Device_Detection
User_Agent_Info::is_bot_user_agent │ public static │ WPSCache 1.0
Is the given user-agent a known bot? If you want an is_bot check for the current request's UA, use is_bot() instead of passing a user-agent to this method.
Метод класса: User_Agent_Info{}
Хуков нет.
Возвращает
true|false.
Использование
$result = User_Agent_Info::is_bot_user_agent( $ua );
- $ua(строка)
- A user-agent string.
По умолчанию: null
Код User_Agent_Info::is_bot_user_agent() User Agent Info::is bot user agent WPSCache 3.0.3
public static function is_bot_user_agent( $ua = null ) {
if ( empty( $ua ) ) {
return false;
}
// Some sourced via
// https://github.com/ua-parser/uap-core/blob/432e95f6767cc8bab4c20c255784cd6f7e93bc15/regexes.yaml#L151
$bot_agents = array(
// Microsoft/Bing https://www.bing.com/webmasters/help/which-crawlers-does-bing-use-8c184ec0
'bingbot', // Bing/Copilot
'adidxbot', // Bing Ads
'bingpreview', // Generates page snapshots for Bing
'bingvideopreview', // Generates previews of videos for Bing
'microsoft',
// Google https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers
'adsbot-google',
'appengine-google',
'feedfetcher-google',
'mediapartners-google',
'storebot-google', // https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#google-storebot
'google sketchup',
'google-cloudbertexbot', // https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#google-cloudvertexbot
'google-extended', // Gemini https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#google-extended
'google-inspectiontool', // https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers
'google-safety;', // https://www.google.com/bot.html
'googlebot-mobile',
'googlebot', // and googlebot-[image,video,news,] https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#googlebot
'googleother', // and googleother-[video,image] https://developers.google.com/search/docs/crawling-indexing/google-common-crawlers#googleother
// OpenAI https://platform.openai.com/docs/bots
'gptbot', // Crawler
'chatgpt-user', // ChatGPT on behalf of user
'oai-searchbot', // ChatGPT search features
// Anthropic
'claudebot', // chat citation fetch https://support.anthropic.com/en/articles/8896518
'claude-web', // web-focused crawl https://darkvisitors.com/agents/claude-web
'anthropic-ai', // bulk model training https://darkvisitors.com/agents/anthropic-ai
// Perplexity
'perplexitybot', // index builder https://docs.perplexity.ai/guides/bots
'perplexity-user', // human-triggered visit https://docs.perplexity.ai/guides/bots
// Meta https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/
'facebookbot', // AI data scraper https://darkvisitors.com/agents/facebookbot
'facebookexternalhit', // shares https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/#identify
'facebookcatalog', // shares https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/#identify
'meta-webindexer', // Meta AI search indexer https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/#meta-webindexer
'meta-externalads', // web crawler improving ads https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/#meta-externalads
'meta-externalagent', // training AI models https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/#identify-2
'meta-externalfetcher', // user-initiated fetches, may skip robots.txt https://developers.facebook.com/docs/sharing/webmasters/web-crawlers/#identify-3
// Semrush https://www.semrush.com/bot/
'semrushbot',
'siteauditbot',
// Other bots (alphabetized list)
'123metaspider-bot',
'1470.net crawler',
'50.nu',
'8bo crawler bot',
'aboundex',
'ahrefsbot',
'ai2bot', // AI2 crawler for LLMm training https://allenai.org/crawler
'alexa',
'altavista',
'amazonbot', // https://developer.amazon.com/amazonbot
'applebot', // https://support.apple.com/en-ca/HT204683
'arcgis hub indexer',
'archive.org_bot', // http://archive.org/details/archive.org_bot
'archiver',
'ask jeeves',
'attentio',
'baiduspider',
'blexbot',
'blitzbot',
'blogbridge',
'bloglovin',
'bne.es_bot', // https://www.bne.es/es/colecciones/archivo-web-espanola/aviso-webmasters
'boardreader blog indexer',
'boardreader favicon fetcher',
'boitho.com-dc',
'botseer',
'bubing',
'bytespider', // ByteDance (owner of TikTok) to train LLMs for Doubao https://darkvisitors.com/agents/bytespider
'catchpoint',
'ccbot', // CommonCrawl non-profit https://commoncrawl.org/ccbot
'charlotte',
'checklinks',
'chtml generic',
'cityreview robot',
'cloudflare-alwaysonline',
'clumboot',
'coccocbot', // Coc Coc https://darkvisitors.com/agents/coccocbot-web
'cohere-ai', // Cohere AI https://darkvisitors.com/agents/cohere-ai
'comodo http',
'comodo-webinspector-crawler',
'converacrawler',
'cookieinformationscanner', // Internal ref p1699315886066389-slack-C0438NHCLSY
'crawl-e',
'crawlconvera',
'crawldaddy',
'crawler',
'crawlfire',
'csimplespider',
'dataforseobot', // https://www.dataforseo.com/dataforseo-bot
'daumoa',
'diffbot', // https://docs.diffbot.com/docs/how-to-use-custom-user-agents-with-extract-apis & https://darkvisitors.com/agents/diffbot
'domaintunocrawler',
'dotbot', // https://darkvisitors.com/agents/dotbot
'duckassistbot', // DuckDuckGo AI Assistant https://darkvisitors.com/agents/duckassistbot
'elisabot',
'ezlynxbot', // https://www.ezoic.com/bot
'fastmobilecrawl',
'feed seeker bot',
'feedbin',
'feedburner',
'finderbots',
'findlinks',
'firefly',
'flamingo_searchengine',
'followsite bot',
'froogle',
'furlbot',
'genieo',
'germcrawler',
'gigabot',
'gomezagent',
'gonzo1',
'grapeshotcrawler',
'grokkit-crawler',
'grub-client',
'gsa-crawler',
'heritrix',
'hiddenmarket',
'holmes',
'hoowwwer',
'htdig',
'httrack',
'ia_archiver',
'icarus6j',
'icc-crawler',
'ichiro',
'iconsurf',
'iescholar',
'iltrovatore',
'index crawler',
'infoseek',
'infuzapp',
'innovazion crawler',
'internetarchive',
'irlbot',
'jbot',
'job roboter',
'jumpbot',
'kaloogabot',
'kiwistatus spider',
'kraken',
'kurzor',
'larbin',
'leia',
'lesnikbot',
'lijit crawler',
'linguee bot',
'linkaider',
'linkcheck',
'linkdexbot',
'linkedinbot',
'linkfluence', // http://linkfluence.com/
'linkwalker', // https://www.linkwalker.com/
'lite bot',
'livelapbot',
'llaut',
'lycos',
'mail.ru_bot',
'masidani_bot',
'masscan',
'mediapartners',
'mediobot',
'mj12bot',
'mogimogi',
'mojeekbot', // https://www.mojeek.com/bot.html
'motionbot',
'mozdex',
'mshots',
'msnbot',
'msrbot',
'mtps feed aggregation system',
'netresearch',
'netvibes',
'newsgator',
'ning',
'nutch',
'nymesis',
'objectssearch',
'ogscrper',
'omgili', // Webz.io web crawler for a data seller https://darkvisitors.com/agents/omgili
'oozbot',
'openbot',
'openhosebot',
'orbiter',
'pagepeeker',
'pagesinventory',
'paxleframework',
'peeplo screenshot bot',
'phpcrawl',
'pingdom.com_bot',
'plantynet_webrobot',
'pompos',
'pss-webkit-request',
'pythumbnail',
'queryseekersp ider',
'queryseekerspider',
'qwantify',
'read%20later',
'reaper',
'redcarpet',
'retreiver',
'riddler',
'rival iq',
'scollspider',
'scooter',
'scrapy',
'scrubby',
'searchsight',
'seekbot',
'semanticdiscovery',
'seostats',
'simplepie',
'simplerss',
'simpy',
'sitecat webbot',
'sitecon',
'slack-imgproxy',
'slackbot-linkexpanding',
'slurp',
'snapbot',
'snapchat', // https://developers.snap.com/robots
'snappy',
'speedy spider',
'spider',
'squrl java',
'stringer',
'taptubot',
'technoratisnoop',
'teoma',
'theusefulbot',
'thumbshots.ru',
'thumbshotsbot',
'timpibot', // LLM trainer https://darkvisitors.com/agents/timpibot
'tiny tiny rss',
'trendictionbot', // http://www.trendiction.de/bot;
'trends crawler',
'tweetmemebot',
'twiceler',
'twitterbot', // https://developer.x.com/en/docs/x-for-websites/cards/guides/getting-started#crawling
'url2png',
'usyd-nlp-spider',
'vagabondo',
'voilabot',
'vortex',
'votay bot',
'voyager',
'wasalive.bot',
'web-sniffer',
'webthumb',
'wesee',
'whatsapp',
'whatweb',
'wire',
'wordpress',
'wotbox',
'wp-e2e-tests', // WordPress e2e tests
'www.almaden.ibm.com',
'xenu',
'yacybot', // http://yacy.net/bot.html
'yahoo! slurp',
'yahooseeker',
'yahooysmcm',
'yammybot',
'yandexbot',
'yottaamonitor',
'youbot', // You.com AI assistant https://darkvisitors.com/agents/youbot
'yowedo',
'zao-crawler',
'zao',
'zebot_www.ze.bz',
'zoombot', // SEOZOom https://darkvisitors.com/agents/zoombot
'zooshot',
'zyborg',
);
foreach ( $bot_agents as $bot_agent ) {
if ( false !== stripos( $ua, $bot_agent ) ) {
return true;
}
}
return false;
}