diff options
-rw-r--r-- | .github/workflows/build.yml | 2 | ||||
-rw-r--r-- | src/lib/inc_generated/ndpi_crawlers_match.c.inc | 232 | ||||
-rw-r--r-- | src/lib/protocols/http.c | 4 | ||||
-rw-r--r-- | tests/cfgs/default/pcap/crawler_false_positive.pcapng | bin | 0 -> 2652 bytes | |||
-rw-r--r-- | tests/cfgs/default/result/crawler_false_positive.pcapng.out | 25 | ||||
-rwxr-xr-x | utils/crawlers_ip_addresses_download.sh | 9 |
6 files changed, 267 insertions, 5 deletions
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 379bd15aa..20ddbeacd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -53,7 +53,7 @@ jobs: - name: Install Ubuntu Prerequisites run: | sudo apt-get update - sudo apt-get install python3-netaddr git + sudo apt-get install python3-netaddr git whois - name: Run Scripts run: | echo 'Running ./utils/bitcoinnodes.sh' diff --git a/src/lib/inc_generated/ndpi_crawlers_match.c.inc b/src/lib/inc_generated/ndpi_crawlers_match.c.inc index fb01d5abc..f9e1c10ad 100644 --- a/src/lib/inc_generated/ndpi_crawlers_match.c.inc +++ b/src/lib/inc_generated/ndpi_crawlers_match.c.inc @@ -615,6 +615,238 @@ static ndpi_network ndpi_http_crawler_bot_protocol_list[] = { { 0x284D8B00 /* 40.77.139.0/25 */, 25, NDPI_HTTP_CRAWLER_BOT }, { 0x144AC500 /* 20.74.197.0/28 */, 28, NDPI_HTTP_CRAWLER_BOT }, { 0x140F85A0 /* 20.15.133.160/27 */, 27, NDPI_HTTP_CRAWLER_BOT }, + { 0x453FB000 /* 69.63.176.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, + { 0x42DC9000 /* 66.220.144.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, + { 0x42DC9000 /* 66.220.144.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT }, + { 0x453FB800 /* 69.63.184.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT }, + { 0x453FB000 /* 69.63.176.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT }, + { 0x4A774C00 /* 74.119.76.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, + { 0x45ABFF00 /* 69.171.255.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xADFC4000 /* 173.252.64.0/18 */, 18, NDPI_HTTP_CRAWLER_BOT }, + { 0x45ABE000 /* 69.171.224.0/19 */, 19, NDPI_HTTP_CRAWLER_BOT }, + { 0x45ABE000 /* 69.171.224.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, + { 0x67046000 /* 103.4.96.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, + { 0xADFC4000 /* 173.252.64.0/19 */, 19, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4000 /* 31.13.64.0/18 */, 18, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D1800 /* 31.13.24.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT }, + { 0x42DC9800 /* 66.220.152.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT }, + { 0x45ABEF00 /* 69.171.239.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x45ABF000 /* 69.171.240.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4000 /* 31.13.64.0/19 */, 19, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4000 /* 31.13.64.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4100 /* 31.13.65.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4300 /* 31.13.67.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4400 /* 31.13.68.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4500 /* 31.13.69.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4600 /* 31.13.70.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4700 /* 31.13.71.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4800 /* 31.13.72.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4900 /* 31.13.73.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4A00 /* 31.13.74.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4B00 /* 31.13.75.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4C00 /* 31.13.76.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4D00 /* 31.13.77.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D6000 /* 31.13.96.0/19 */, 19, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4200 /* 31.13.66.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xADFC6000 /* 173.252.96.0/19 */, 19, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4E00 /* 31.13.78.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D4F00 /* 31.13.79.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5000 /* 31.13.80.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5200 /* 31.13.82.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5300 /* 31.13.83.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5400 /* 31.13.84.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5500 /* 31.13.85.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5600 /* 31.13.86.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5700 /* 31.13.87.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5800 /* 31.13.88.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5900 /* 31.13.89.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5B00 /* 31.13.91.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5C00 /* 31.13.92.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5D00 /* 31.13.93.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5E00 /* 31.13.94.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5F00 /* 31.13.95.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5100 /* 31.13.81.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB33CC000 /* 179.60.192.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, + { 0xB33CC000 /* 179.60.192.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB33CC100 /* 179.60.193.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB33CC200 /* 179.60.194.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB33CC300 /* 179.60.195.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB93CD800 /* 185.60.216.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, + { 0x2D402800 /* 45.64.40.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, + { 0xB93CD800 /* 185.60.216.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB93CD900 /* 185.60.217.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB93CDA00 /* 185.60.218.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB93CDB00 /* 185.60.219.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81860000 /* 129.134.0.0/16 */, 16, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00000 /* 157.240.0.0/16 */, 16, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00800 /* 157.240.8.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00000 /* 157.240.0.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00100 /* 157.240.1.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00200 /* 157.240.2.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00300 /* 157.240.3.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00500 /* 157.240.5.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00600 /* 157.240.6.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00700 /* 157.240.7.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00900 /* 157.240.9.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00A00 /* 157.240.10.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01000 /* 157.240.16.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01300 /* 157.240.19.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00B00 /* 157.240.11.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00C00 /* 157.240.12.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00D00 /* 157.240.13.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00E00 /* 157.240.14.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00F00 /* 157.240.15.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01100 /* 157.240.17.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01200 /* 157.240.18.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01400 /* 157.240.20.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01500 /* 157.240.21.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01600 /* 157.240.22.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01700 /* 157.240.23.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF00000 /* 157.240.0.0/17 */, 17, NDPI_HTTP_CRAWLER_BOT }, + { 0x45ABFA00 /* 69.171.250.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xCC0F1400 /* 204.15.20.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0C000 /* 157.240.192.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0C600 /* 157.240.198.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x66846000 /* 102.132.96.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, + { 0x66846000 /* 102.132.96.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x66846100 /* 102.132.97.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01A00 /* 157.240.26.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01B00 /* 157.240.27.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01C00 /* 157.240.28.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01D00 /* 157.240.29.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01E00 /* 157.240.30.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81861C00 /* 129.134.28.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81861D00 /* 129.134.29.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0D000 /* 157.240.208.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0C100 /* 157.240.193.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0C200 /* 157.240.194.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0C300 /* 157.240.195.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0C500 /* 157.240.197.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0C400 /* 157.240.196.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0C800 /* 157.240.200.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0C900 /* 157.240.201.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0CB00 /* 157.240.203.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0CC00 /* 157.240.204.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0CD00 /* 157.240.205.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0CE00 /* 157.240.206.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0CF00 /* 157.240.207.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0D100 /* 157.240.209.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0D200 /* 157.240.210.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0D300 /* 157.240.211.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0D400 /* 157.240.212.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0D500 /* 157.240.213.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0D600 /* 157.240.214.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0D700 /* 157.240.215.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0D800 /* 157.240.216.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0DE00 /* 157.240.222.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81861E00 /* 129.134.30.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81861F00 /* 129.134.31.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81861E00 /* 129.134.30.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT }, + { 0x81861900 /* 129.134.25.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81861A00 /* 129.134.26.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81861B00 /* 129.134.27.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x66846300 /* 102.132.99.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x66846500 /* 102.132.101.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864000 /* 129.134.64.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864100 /* 129.134.65.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864200 /* 129.134.66.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864300 /* 129.134.67.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0DB00 /* 157.240.219.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0D900 /* 157.240.217.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0DA00 /* 157.240.218.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0C700 /* 157.240.199.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81867F00 /* 129.134.127.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0DF00 /* 157.240.223.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0C000 /* 157.240.192.0/18 */, 18, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0DD00 /* 157.240.221.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0DC00 /* 157.240.220.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xADFC5800 /* 173.252.88.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864400 /* 129.134.68.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864500 /* 129.134.69.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864600 /* 129.134.70.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01800 /* 157.240.24.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01900 /* 157.240.25.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x66846400 /* 102.132.100.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF01F00 /* 157.240.31.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0E000 /* 157.240.224.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864700 /* 129.134.71.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0E100 /* 157.240.225.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0E200 /* 157.240.226.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0E300 /* 157.240.227.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81860000 /* 129.134.0.0/17 */, 17, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864800 /* 129.134.72.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864900 /* 129.134.73.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864A00 /* 129.134.74.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB959DA00 /* 185.89.218.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB959DB00 /* 185.89.219.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB959DA00 /* 185.89.218.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0E400 /* 157.240.228.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0E500 /* 157.240.229.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864C00 /* 129.134.76.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864B00 /* 129.134.75.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0EF00 /* 157.240.239.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0F000 /* 157.240.240.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0F100 /* 157.240.241.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0E700 /* 157.240.231.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0E800 /* 157.240.232.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0E900 /* 157.240.233.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0EA00 /* 157.240.234.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0EB00 /* 157.240.235.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0EC00 /* 157.240.236.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864D00 /* 129.134.77.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864E00 /* 129.134.78.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81864F00 /* 129.134.79.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0ED00 /* 157.240.237.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0EE00 /* 157.240.238.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0F200 /* 157.240.242.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0F300 /* 157.240.243.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81867000 /* 129.134.112.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF06400 /* 157.240.100.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF06200 /* 157.240.98.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF06000 /* 157.240.96.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF06300 /* 157.240.99.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF06500 /* 157.240.101.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81867100 /* 129.134.113.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81867200 /* 129.134.114.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF06100 /* 157.240.97.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x81867300 /* 129.134.115.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0F400 /* 157.240.244.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0F500 /* 157.240.245.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0F600 /* 157.240.246.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0F700 /* 157.240.247.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0F800 /* 157.240.248.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0F900 /* 157.240.249.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0FA00 /* 157.240.250.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xA3468000 /* 163.70.128.0/17 */, 17, NDPI_HTTP_CRAWLER_BOT }, + { 0xA34D8000 /* 163.77.128.0/17 */, 17, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0FB00 /* 157.240.251.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0FC00 /* 157.240.252.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0FD00 /* 157.240.253.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x934BD000 /* 147.75.208.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0FE00 /* 157.240.254.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x453FB200 /* 69.63.178.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x9DF0CA00 /* 157.240.202.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0x1F0D5A00 /* 31.13.90.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xA3468000 /* 163.70.128.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xA3468100 /* 163.70.129.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xA3468200 /* 163.70.130.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xA3468300 /* 163.70.131.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xA3468400 /* 163.70.132.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xA3468800 /* 163.70.136.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xA3468600 /* 163.70.134.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xA3468700 /* 163.70.135.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xA3468500 /* 163.70.133.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB959DB00 /* 185.89.219.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB959DA00 /* 185.89.218.0/24 */, 24, NDPI_HTTP_CRAWLER_BOT }, + { 0xB959DA00 /* 185.89.218.0/23 */, 23, NDPI_HTTP_CRAWLER_BOT }, + { 0xB959D800 /* 185.89.216.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, + { 0x934BD000 /* 147.75.208.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, + { 0xCC0F1400 /* 204.15.20.0/22 */, 22, NDPI_HTTP_CRAWLER_BOT }, + { 0x453FB000 /* 69.63.176.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, + { 0x453FB000 /* 69.63.176.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT }, + { 0x453FB800 /* 69.63.184.0/21 */, 21, NDPI_HTTP_CRAWLER_BOT }, + { 0x42DC9000 /* 66.220.144.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, + { 0x453FB000 /* 69.63.176.0/20 */, 20, NDPI_HTTP_CRAWLER_BOT }, /* End */ { 0x0, 0, 0 } }; diff --git a/src/lib/protocols/http.c b/src/lib/protocols/http.c index 320fd22e4..2109f8ed3 100644 --- a/src/lib/protocols/http.c +++ b/src/lib/protocols/http.c @@ -607,8 +607,8 @@ static void ndpi_check_user_agent(struct ndpi_detection_module_struct *ndpi_stru Amazon-Route53-Health-Check-Service (ref 68784dad-be98-49e4-a63c-9fbbe2816d7c; report http://amzn.to/1vsZADi) Anonymous Crawler/1.0 (Webcrawler developed with StormCrawler; http://example.com/; webcrawler@example.com) */ - if((strstr(ua, "+http") != NULL) - || (strstr(ua, " http") != NULL) + if((strstr(ua, "+http:") != NULL) + || (strstr(ua, " http:") != NULL) || ndpi_strncasestr(ua, "Crawler", ua_len) || ndpi_strncasestr(ua, "Bot", ua_len) /* bot/robot */ ) { diff --git a/tests/cfgs/default/pcap/crawler_false_positive.pcapng b/tests/cfgs/default/pcap/crawler_false_positive.pcapng Binary files differnew file mode 100644 index 000000000..1cb17446b --- /dev/null +++ b/tests/cfgs/default/pcap/crawler_false_positive.pcapng diff --git a/tests/cfgs/default/result/crawler_false_positive.pcapng.out b/tests/cfgs/default/result/crawler_false_positive.pcapng.out new file mode 100644 index 000000000..4c838cbda --- /dev/null +++ b/tests/cfgs/default/result/crawler_false_positive.pcapng.out @@ -0,0 +1,25 @@ +Guessed flow protos: 0 + +DPI Packets (TCP): 8 (8.00 pkts/flow) +Confidence DPI : 1 (flows) +Num dissector calls: 12 (12.00 diss/flow) +LRU cache ookla: 0/0/0 (insert/search/found) +LRU cache bittorrent: 0/0/0 (insert/search/found) +LRU cache zoom: 0/0/0 (insert/search/found) +LRU cache stun: 0/0/0 (insert/search/found) +LRU cache tls_cert: 0/0/0 (insert/search/found) +LRU cache mining: 0/0/0 (insert/search/found) +LRU cache msteams: 0/0/0 (insert/search/found) +LRU cache stun_zoom: 0/0/0 (insert/search/found) +Automa host: 0/0 (search/found) +Automa domain: 0/0 (search/found) +Automa tls cert: 0/0 (search/found) +Automa risk mask: 0/0 (search/found) +Automa common alpns: 0/0 (search/found) +Patricia risk mask: 2/0 (search/found) +Patricia risk: 0/0 (search/found) +Patricia protocols: 1/1 (search/found) + +OCSP 12 1842 1 + + 1 TCP 192.168.12.156:38291 <-> 93.184.220.29:80 [proto: 7.63/HTTP.OCSP][IP: 288/Edgecast][ClearText][Confidence: DPI][DPI packets: 8][cat: Web/5][7 pkts/705 bytes <-> 5 pkts/1137 bytes][Goodput ratio: 33/70][0.04 sec][Hostname/SNI: ocsp.digicert.com][bytes ratio: -0.235 (Download)][IAT c2s/s2c min/avg/max/stddev: 0/0 5/6 8/10 4/4][Pkt Len c2s/s2c min/avg/max/stddev: 66/66 101/227 284/865 75/319][StatusCode: 200][Req Content-Type: application/ocsp-request][Content-Type: application/ocsp-response][Server: ECS (mil/6CF7)][User-Agent: zbtls http][PLAIN TEXT (ConnectionTP/1.1)][Plen Bins: 33,0,0,0,0,0,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] diff --git a/utils/crawlers_ip_addresses_download.sh b/utils/crawlers_ip_addresses_download.sh index c4f4daa0b..77e70c61b 100755 --- a/utils/crawlers_ip_addresses_download.sh +++ b/utils/crawlers_ip_addresses_download.sh @@ -9,6 +9,7 @@ TMP1=/tmp/bot_google_c1.json TMP2=/tmp/bot_google_c2.json TMP3=/tmp/bot_google_c3.json TMP_BING=/tmp/bot_bing.json +TMP_FB=/tmp/bot_fb.list LIST=/tmp/bot.list #Google Common crawlers ORIGIN1="https://developers.google.com/static/search/apis/ipranges/googlebot.json" @@ -18,7 +19,7 @@ ORIGIN2="https://developers.google.com/static/search/apis/ipranges/special-crawl ORIGIN3="https://developers.google.com/static/search/apis/ipranges/user-triggered-fetchers.json" #Bing Bot ORIGIN_BING="https://www.bing.com/toolbox/bingbot.json" - +#Facebook Bot: https://developers.facebook.com/docs/sharing/webmasters/crawler/ echo "(1) Downloading file... ${ORIGIN1}" http_response=$(curl -s -o $TMP1 -w "%{http_code}" ${ORIGIN1}) @@ -48,15 +49,19 @@ if [ "$http_response" != "200" ]; then exit 1 fi +echo "(1) Downloading FB crawlers routes... " +whois -h whois.radb.net -- '-i origin AS32934' | grep ^route > $TMP_FB + echo "(2) Processing IP addresses..." { jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP1 # TODO: ipv6 jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP2 # TODO: ipv6 jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP3 # TODO: ipv6 jq -r '.prefixes | .[].ipv4Prefix | select( . != null )' $TMP_BING # TODO: ipv6 + grep -v route6 $TMP_FB | tr -d 'route:^ ' # TODO: ipv6 } > $LIST ./ipaddr2list.py $LIST NDPI_HTTP_CRAWLER_BOT > $DEST -rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $LIST +rm -f $TMP1 $TMP2 $TMP3 $TMP_BING $TMP_FB $LIST echo "(3) Crawlers IPs are available in $DEST" exit 0 |