diff options
author | Ivan Nardi <12729895+IvanNardi@users.noreply.github.com> | 2024-07-03 16:16:54 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-03 16:16:54 +0200 |
commit | d42f0e6ab35ee0a196ff6a0fff76cfe8ea00afb9 (patch) | |
tree | 8723873d00add5642ddcfac0fa08ba3ebee81a32 | |
parent | dab8d3056ec4571a0343bd7fc3cdce9c4d944719 (diff) |
Add detection of Twitter bot (#2487)
Update the global list of crawlers ips
-rw-r--r-- | src/lib/inc_generated/ndpi_crawlers_match.c.inc | 94 | ||||
-rwxr-xr-x | utils/crawlers_ip_addresses_download.sh | 12 |
2 files changed, 68 insertions, 38 deletions
diff --git a/src/lib/inc_generated/ndpi_crawlers_match.c.inc b/src/lib/inc_generated/ndpi_crawlers_match.c.inc index 5a0651c75..6a6b8d276 100644 --- a/src/lib/inc_generated/ndpi_crawlers_match.c.inc +++ b/src/lib/inc_generated/ndpi_crawlers_match.c.inc @@ -49,8 +49,7 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = { { 0x22628880 /* 34.98.136.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x226288C0 /* 34.98.136.192/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x22628900 /* 34.98.137.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, - { 0x22628A00 /* 34.98.138.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, - { 0x22628A40 /* 34.98.138.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, + { 0x22628A00 /* 34.98.138.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, { 0x22628B00 /* 34.98.139.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, { 0x22628B80 /* 34.98.139.128/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x22628C00 /* 34.98.140.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, @@ -87,12 +86,15 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = { { 0x22742400 /* 34.116.36.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x22742440 /* 34.116.36.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x22742500 /* 34.116.37.0/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, + { 0x22742700 /* 34.116.39.0/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x22742800 /* 34.116.40.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x22742900 /* 34.116.41.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, { 0x22742B00 /* 34.116.43.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, { 0x22764200 /* 34.118.66.0/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, { 0x2276FE00 /* 34.118.254.0/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, { 0x227EB260 /* 34.126.178.96/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, + { 0x227F8000 /* 34.127.128.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, + { 0x227F8040 /* 34.127.128.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x22929690 /* 34.146.150.144/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, { 0x22936E90 /* 34.147.110.144/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, { 0x22974A90 /* 34.151.74.144/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, @@ -119,15 +121,16 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = { { 0x23BB8A00 /* 35.187.138.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT }, { 0x23BB8C00 /* 35.187.140.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, { 0x23BB8C80 /* 35.187.140.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, - { 0x23BB8D00 /* 35.187.141.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x23BB8D00 /* 35.187.141.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, + { 0x23BB8D40 /* 35.187.141.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, + { 0x23BB8D80 /* 35.187.141.128/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, { 0x23BB8E00 /* 35.187.142.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x23BB8E40 /* 35.187.142.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x23BB8F00 /* 35.187.143.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x23BB8F40 /* 35.187.143.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x23F31000 /* 35.243.16.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, { 0x23F31080 /* 35.243.16.128/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, - { 0x23F31100 /* 35.243.17.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, - { 0x23F31140 /* 35.243.17.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, + { 0x23F31100 /* 35.243.17.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, { 0x23F31200 /* 35.243.18.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x23F31240 /* 35.243.18.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x23F31300 /* 35.243.19.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, @@ -141,6 +144,8 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = { { 0x23F7F3F0 /* 35.247.243.240/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, { 0x284D8B00 /* 40.77.139.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, { 0x284DA700 /* 40.77.167.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x284DB100 /* 40.77.177.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x284DB200 /* 40.77.178.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT }, { 0x284DBC00 /* 40.77.188.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, { 0x284DCA00 /* 40.77.202.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, { 0x284F83D0 /* 40.79.131.208/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, @@ -149,15 +154,20 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = { { 0x33694300 /* 51.105.67.0/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, { 0x34A79000 /* 52.167.144.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, { 0x34E79400 /* 52.231.148.0/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, + { 0x398D0000 /* 57.141.0.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT }, + { 0x398D0800 /* 57.141.8.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, + { 0x398D0C00 /* 57.141.12.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT }, { 0x39900000 /* 57.144.0.0/14 */, 14, NDPI_HTTP_CRAWLER_BOT }, { 0x4137D200 /* 65.55.210.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, { 0x42DC9000 /* 66.220.144.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, - { 0x42F94000 /* 66.249.64.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x42F94000 /* 66.249.64.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, + { 0x42F94080 /* 66.249.64.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, + { 0x42F940E0 /* 66.249.64.224/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x42F94100 /* 66.249.65.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, { 0x42F941A0 /* 66.249.65.160/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x42F941C0 /* 66.249.65.192/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x42F94200 /* 66.249.66.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, - { 0x42F94280 /* 66.249.66.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, + { 0x42F942A0 /* 66.249.66.160/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x42F942C0 /* 66.249.66.192/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x42F94400 /* 66.249.68.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x42F94440 /* 66.249.68.64/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, @@ -171,12 +181,15 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = { { 0x42F94E00 /* 66.249.78.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x42F94F00 /* 66.249.79.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, { 0x42F95700 /* 66.249.87.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, - { 0x42F95900 /* 66.249.89.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x42F95900 /* 66.249.89.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, + { 0x42F95980 /* 66.249.89.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, + { 0x42F959E0 /* 66.249.89.224/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x42F95A00 /* 66.249.90.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x42F95A60 /* 66.249.90.96/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x42F95A80 /* 66.249.90.128/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, { 0x42F95B00 /* 66.249.91.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, - { 0x42F95C00 /* 66.249.92.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, + { 0x42F95C00 /* 66.249.92.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, + { 0x42F95C60 /* 66.249.92.96/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x42F95C80 /* 66.249.92.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x42F95CC0 /* 66.249.92.192/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x453FB000 /* 69.63.176.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, @@ -195,9 +208,11 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = { { 0x66846000 /* 102.132.96.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, { 0x67046000 /* 103.4.96.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, { 0x6BB2C000 /* 107.178.192.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, - { 0x6BB2C080 /* 107.178.192.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, + { 0x6BB2C0A0 /* 107.178.192.160/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x6BB2C0C0 /* 107.178.192.192/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, - { 0x6BB2C100 /* 107.178.193.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x6BB2C100 /* 107.178.193.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, + { 0x6BB2C180 /* 107.178.193.128/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, + { 0x6BB2C1C0 /* 107.178.193.192/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x6BB2C200 /* 107.178.194.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, { 0x6BB2C300 /* 107.178.195.0/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, { 0x6BB2C360 /* 107.178.195.96/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, @@ -205,8 +220,10 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = { { 0x6BB2C400 /* 107.178.196.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, { 0x6BB2CA00 /* 107.178.202.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, { 0x6BB2CA80 /* 107.178.202.128/26 */, 26, NDPI_HTTP_CRAWLER_BOT }, + { 0x6BB2CAC0 /* 107.178.202.192/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x6BB2CB00 /* 107.178.203.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, { 0x6BB2E000 /* 107.178.224.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT }, + { 0x6CB10200 /* 108.177.2.0/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0x81860000 /* 129.134.0.0/16 */, 16, NDPI_HTTP_CRAWLER_BOT }, { 0x8BD93400 /* 139.217.52.0/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, { 0x934BD000 /* 147.75.208.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, @@ -219,9 +236,13 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = { { 0xB93CD800 /* 185.60.216.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, { 0xB959D800 /* 185.89.216.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, { 0xBFE9CCE0 /* 191.233.204.224/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, + { 0xC0854C00 /* 192.133.76.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, { 0xC0B20500 /* 192.178.5.0/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, + { 0xC0B20600 /* 192.178.6.0/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, { 0xC0B21100 /* 192.178.17.0/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, + { 0xC7109C00 /* 199.16.156.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, { 0xC71E1800 /* 199.30.24.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT }, + { 0xC73B9400 /* 199.59.148.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, { 0xCC0F1400 /* 204.15.20.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, { 0xCF2E0D00 /* 207.46.13.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, { 0xD155EE00 /* 209.85.238.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, @@ -230,15 +251,14 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = { }; static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = { - { "2001:4860:4801:2::", 63, NDPI_HTTP_CRAWLER_BOT }, + { "2001:4860:4801:2::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:c::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:f::", 64, NDPI_HTTP_CRAWLER_BOT }, - { "2001:4860:4801:10::", 61, NDPI_HTTP_CRAWLER_BOT }, - { "2001:4860:4801:18::", 62, NDPI_HTTP_CRAWLER_BOT }, - { "2001:4860:4801:1c::", 63, NDPI_HTTP_CRAWLER_BOT }, - { "2001:4860:4801:1e::", 64, NDPI_HTTP_CRAWLER_BOT }, + { "2001:4860:4801:10::", 60, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:20::", 60, NDPI_HTTP_CRAWLER_BOT }, - { "2001:4860:4801:30::", 61, NDPI_HTTP_CRAWLER_BOT }, + { "2001:4860:4801:31::", 64, NDPI_HTTP_CRAWLER_BOT }, + { "2001:4860:4801:32::", 63, NDPI_HTTP_CRAWLER_BOT }, + { "2001:4860:4801:34::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:38::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:3c::", 63, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:3e::", 64, NDPI_HTTP_CRAWLER_BOT }, @@ -254,14 +274,13 @@ static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = { { "2001:4860:4801:80::", 61, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:88::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:90::", 62, NDPI_HTTP_CRAWLER_BOT }, - { "2001:4860:4801:2008::", 63, NDPI_HTTP_CRAWLER_BOT }, + { "2001:4860:4801:2008::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:200c::", 63, NDPI_HTTP_CRAWLER_BOT }, - { "2001:4860:4801:2010::", 61, NDPI_HTTP_CRAWLER_BOT }, - { "2001:4860:4801:2018::", 62, NDPI_HTTP_CRAWLER_BOT }, - { "2001:4860:4801:201c::", 63, NDPI_HTTP_CRAWLER_BOT }, - { "2001:4860:4801:201e::", 64, NDPI_HTTP_CRAWLER_BOT }, + { "2001:4860:4801:2010::", 60, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:2020::", 60, NDPI_HTTP_CRAWLER_BOT }, - { "2001:4860:4801:2030::", 61, NDPI_HTTP_CRAWLER_BOT }, + { "2001:4860:4801:2031::", 64, NDPI_HTTP_CRAWLER_BOT }, + { "2001:4860:4801:2032::", 63, NDPI_HTTP_CRAWLER_BOT }, + { "2001:4860:4801:2034::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:2038::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:203c::", 63, NDPI_HTTP_CRAWLER_BOT }, { "2001:4860:4801:203e::", 64, NDPI_HTTP_CRAWLER_BOT }, @@ -284,7 +303,7 @@ static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = { { "2404:f340:4010:4004::", 63, NDPI_HTTP_CRAWLER_BOT }, { "2404:f340:4010:4006::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:8::", 63, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:c::", 63, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:c::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:f::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:10::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:15::", 64, NDPI_HTTP_CRAWLER_BOT }, @@ -298,7 +317,9 @@ static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = { { "2600:1900:0:34::", 63, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:36::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:40::", 60, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:50::", 61, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:51::", 64, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:52::", 63, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:54::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:58::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:5c::", 63, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:5e::", 64, NDPI_HTTP_CRAWLER_BOT }, @@ -314,18 +335,20 @@ static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = { { "2600:1900:0:90::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:94::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:a0::", 62, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:a4::", 64, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:a4::", 63, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:b0::", 61, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:b8::", 63, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:c0::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:c4::", 63, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:c6::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:d0::", 60, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:e0::", 59, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:e0::", 60, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:f0::", 61, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:f8::", 63, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:fa::", 64, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:fc::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:100::", 60, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:110::", 62, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:114::", 63, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:116::", 64, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:110::", 61, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:118::", 63, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:11a::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:11c::", 62, NDPI_HTTP_CRAWLER_BOT }, @@ -341,18 +364,14 @@ static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = { { "2600:1900:0:164::", 63, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:166::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:170::", 63, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:180::", 61, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:188::", 62, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:18c::", 63, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:18e::", 64, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:180::", 60, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:190::", 63, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:192::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:1a0::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:1c0::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:1c4::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:1d0::", 64, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:1e0::", 63, NDPI_HTTP_CRAWLER_BOT }, - { "2600:1900:0:1e2::", 64, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:1e0::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:1f0::", 62, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:1f4::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:200::", 62, NDPI_HTTP_CRAWLER_BOT }, @@ -384,10 +403,11 @@ static ndpi_network6 ndpi_http_crawler_bot_protocol_list_6[] = { { "2600:1900:0:330::", 63, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:332::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2600:1900:0:340::", 64, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:350::", 63, NDPI_HTTP_CRAWLER_BOT }, + { "2600:1900:0:352::", 64, NDPI_HTTP_CRAWLER_BOT }, { "2620:0:1c00::", 40, NDPI_HTTP_CRAWLER_BOT }, { "2a03:2880::", 31, NDPI_HTTP_CRAWLER_BOT }, { "2a03:2887:ff2c::", 47, NDPI_HTTP_CRAWLER_BOT }, - { "2a03:2887:ff42::", 48, NDPI_HTTP_CRAWLER_BOT }, { "2a03:83e0::", 32, NDPI_HTTP_CRAWLER_BOT }, { "2a10:f781:10:cee0::", 64, NDPI_HTTP_CRAWLER_BOT }, /* End */ diff --git a/utils/crawlers_ip_addresses_download.sh b/utils/crawlers_ip_addresses_download.sh index 45dbdcd4a..d15c6e4c1 100755 --- a/utils/crawlers_ip_addresses_download.sh +++ b/utils/crawlers_ip_addresses_download.sh @@ -11,6 +11,7 @@ TMP2=/tmp/bot_google_c2.json TMP3=/tmp/bot_google_c3.json TMP_BING=/tmp/bot_bing.json TMP_FB=/tmp/bot_fb.list +TMP_TW=/tmp/bot_tw.list LIST=/tmp/bot.list LIST6=/tmp/bot.list6 LIST_MERGED=/tmp/bot.list_m @@ -24,6 +25,8 @@ ORIGIN3="https://developers.google.com/static/search/apis/ipranges/user-triggere #Bing Bot ORIGIN_BING="https://www.bing.com/toolbox/bingbot.json" #Facebook Bot: https://developers.facebook.com/docs/sharing/webmasters/crawler/ +#TwitterBot +ORIGIN_TW="https://developer.x.com/en/docs/twitter-for-websites/cards/guides/troubleshooting-cards" echo "(1) Downloading file... ${ORIGIN1}" http_response=$(curl -s -o $TMP1 -w "%{http_code}" ${ORIGIN1}) @@ -49,6 +52,12 @@ echo "(1) Downloading FB crawlers routes... " whois -h whois.radb.net -- '-i origin AS32934' | grep ^route > $TMP_FB is_file_empty "${TMP_FB}" +echo "(1) Downloading page... ${ORIGIN_TW}" +http_response=$(curl -s -o $TMP_TW -w "%{http_code}" ${ORIGIN_TW}) +check_http_response "${http_response}" +is_file_empty "${TMP_TW}" + + echo "(2) Processing IP addresses..." { jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP1 @@ -56,6 +65,7 @@ echo "(2) Processing IP addresses..." jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP3 jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP_BING grep -v route6 $TMP_FB | tr -d 'route:^ ' + grep "IP ranges are" $TMP_TW | grep -E -o "[^^][0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}/[0-9]{1,2}" | tr -d ' ' # TODO: ipv4 only } > $LIST is_file_empty "${LIST}" ./mergeipaddrlist.py "${LIST}" > "${LIST_MERGED}" @@ -72,7 +82,7 @@ is_file_empty "${LIST6}" is_file_empty "${LIST6_MERGED}" ./ipaddr2list.py $LIST_MERGED NDPI_HTTP_CRAWLER_BOT $LIST6_MERGED > $DEST is_file_empty "${DEST}" -rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $TMP_FB $LIST $LIST6 $LIST_MERGED $LIST6_MERGED +rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $TMP_FB $TMP_TW $LIST $LIST6 $LIST_MERGED $LIST6_MERGED echo "(3) Crawlers IPs are available in $DEST" exit 0 |