diff options
author | Luca Deri <deri@ntop.org> | 2022-02-17 17:20:52 +0100 |
---|---|---|
committer | Luca Deri <deri@ntop.org> | 2022-02-17 17:20:52 +0100 |
commit | a2878af1eed26db8380bf8c29e5bb64a0181f935 (patch) | |
tree | a341c52e76f170f799a24bca3f7a3bc57071ca5d | |
parent | 8a2a47e62a0d7b1bc8815dc4f09c35b73393454e (diff) |
Added newflow risk NDPI_HTTP_CRAWLER_BOT
-rw-r--r-- | doc/flow_risks.rst | 6 | ||||
-rw-r--r-- | example/ndpiReader.c | 10 | ||||
-rw-r--r-- | python/ndpi.py | 1 | ||||
-rw-r--r-- | src/include/ndpi_typedefs.h | 1 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 1 | ||||
-rw-r--r-- | src/lib/ndpi_utils.c | 4 | ||||
-rw-r--r-- | src/lib/protocols/http.c | 50 | ||||
-rw-r--r-- | tests/pcap/bot.pcap | bin | 0 -> 437580 bytes | |||
-rw-r--r-- | tests/result/bot.pcap.out | 8 | ||||
-rw-r--r-- | wireshark/ndpi.lua | 1 |
10 files changed, 65 insertions, 17 deletions
diff --git a/doc/flow_risks.rst b/doc/flow_risks.rst index 95001098c..4b363365b 100644 --- a/doc/flow_risks.rst +++ b/doc/flow_risks.rst @@ -266,4 +266,10 @@ NDPI_ERROR_CODE_DETECTED =================================== The risk is set whenever an error code is detected in the underlying protocol (e.g. HTTP and DNS). +.. _Risk 044: + +NDPI_HTTP_CRAWLER_BOT +=================================== +The risk is set whenever a crawler/bot/robot has been detected + diff --git a/example/ndpiReader.c b/example/ndpiReader.c index f21fbc86a..5f7f5d2b2 100644 --- a/example/ndpiReader.c +++ b/example/ndpiReader.c @@ -735,7 +735,7 @@ void printCSVHeader() { #if 0 fprintf(csv_fp, "tls_issuerDN,tls_subjectDN,"); #endif - fprintf(csv_fp, "ssh_client_hassh,ssh_server_hassh,flow_info,plen_bins"); + fprintf(csv_fp, "ssh_client_hassh,ssh_server_hassh,flow_info,plen_bins,http_user_agent"); if(enable_flow_stats) { fprintf(csv_fp, ",byte_dist_mean,byte_dist_std,entropy,total_entropy"); @@ -1230,9 +1230,11 @@ static void printFlow(u_int32_t id, struct ndpi_flow_info *flow, u_int16_t threa /* TCP flags */ fprintf(csv_fp, "%d,%d,%d,%d,%d,%d,%d,%d,", flow->cwr_count, flow->ece_count, flow->urg_count, flow->ack_count, flow->psh_count, flow->rst_count, flow->syn_count, flow->fin_count); - fprintf(csv_fp, "%d,%d,%d,%d,%d,%d,%d,%d,", flow->src2dst_cwr_count, flow->src2dst_ece_count, flow->src2dst_urg_count, flow->src2dst_ack_count, flow->src2dst_psh_count, flow->src2dst_rst_count, flow->src2dst_syn_count, flow->src2dst_fin_count); + fprintf(csv_fp, "%d,%d,%d,%d,%d,%d,%d,%d,", flow->src2dst_cwr_count, flow->src2dst_ece_count, flow->src2dst_urg_count, flow->src2dst_ack_count, + flow->src2dst_psh_count, flow->src2dst_rst_count, flow->src2dst_syn_count, flow->src2dst_fin_count); - fprintf(csv_fp, "%d,%d,%d,%d,%d,%d,%d,%d,", flow->dst2src_cwr_count, flow->ece_count, flow->urg_count, flow->ack_count, flow->psh_count, flow->rst_count, flow->syn_count, flow->fin_count); + fprintf(csv_fp, "%d,%d,%d,%d,%d,%d,%d,%d,", flow->dst2src_cwr_count, flow->ece_count, flow->urg_count, flow->ack_count, + flow->psh_count, flow->rst_count, flow->syn_count, flow->fin_count); /* TCP window */ fprintf(csv_fp, "%u,%u,", flow->c_to_s_init_win, flow->s_to_c_init_win); @@ -1269,6 +1271,8 @@ static void printFlow(u_int32_t id, struct ndpi_flow_info *flow, u_int16_t threa #ifndef DIRECTION_BINS print_bin(csv_fp, NULL, &flow->payload_len_bin); #endif + + fprintf(csv_fp, ",%s", flow->http.user_agent); } if((verbose != 1) && (verbose != 2)) { diff --git a/python/ndpi.py b/python/ndpi.py index d296844e7..039f222ae 100644 --- a/python/ndpi.py +++ b/python/ndpi.py @@ -335,6 +335,7 @@ typedef enum { NDPI_TLS_CERTIFICATE_ABOUT_TO_EXPIRE, NDPI_PUNYCODE_IDN, NDPI_ERROR_CODE_DETECTED, + NDPI_HTTP_CRAWLER_BOT, /* Leave this as last member */ NDPI_MAX_RISK diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h index 0b798c530..a86fa79da 100644 --- a/src/include/ndpi_typedefs.h +++ b/src/include/ndpi_typedefs.h @@ -116,6 +116,7 @@ typedef enum { NDPI_TLS_CERTIFICATE_ABOUT_TO_EXPIRE, NDPI_PUNYCODE_IDN, /* https://en.wikipedia.org/wiki/Punycode */ NDPI_ERROR_CODE_DETECTED, + NDPI_HTTP_CRAWLER_BOT, /* Leave this as last member */ NDPI_MAX_RISK /* must be <= 63 due to (**) */ diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index 7c0e8f3b1..2740ec8bb 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -125,6 +125,7 @@ static ndpi_risk_info ndpi_known_risks[] = { { NDPI_TLS_CERTIFICATE_ABOUT_TO_EXPIRE, NDPI_RISK_MEDIUM, CLIENT_LOW_RISK_PERCENTAGE }, { NDPI_PUNYCODE_IDN, NDPI_RISK_LOW, CLIENT_LOW_RISK_PERCENTAGE }, { NDPI_ERROR_CODE_DETECTED, NDPI_RISK_LOW, CLIENT_LOW_RISK_PERCENTAGE }, + { NDPI_HTTP_CRAWLER_BOT, NDPI_RISK_LOW, CLIENT_LOW_RISK_PERCENTAGE }, /* Leave this as last member */ { NDPI_MAX_RISK, NDPI_RISK_LOW, CLIENT_FAIR_RISK_PERCENTAGE } diff --git a/src/lib/ndpi_utils.c b/src/lib/ndpi_utils.c index fabc4db2a..29cb94695 100644 --- a/src/lib/ndpi_utils.c +++ b/src/lib/ndpi_utils.c @@ -1852,6 +1852,10 @@ const char* ndpi_risk2str(ndpi_risk_enum risk) { return("Error Code Detected"); break; + case NDPI_HTTP_CRAWLER_BOT: + return("Crawler/Bot Detected"); + break; + default: snprintf(buf, sizeof(buf), "%d", (int)risk); return(buf); diff --git a/src/lib/protocols/http.c b/src/lib/protocols/http.c index cf1e6282b..b34206271 100644 --- a/src/lib/protocols/http.c +++ b/src/lib/protocols/http.c @@ -422,31 +422,53 @@ static void ndpi_check_user_agent(struct ndpi_detection_module_struct *ndpi_stru struct ndpi_flow_struct *flow, char *ua) { u_int len; - + char *double_slash; + if((!ua) || (ua[0] == '\0')) return; else len = strlen(ua); - if( - (!strncmp(ua, "<?", 2)) - || strchr(ua, '$') - || strstr(ua, "://") // || (!strncmp(ua, "jndi:ldap://", 12)) /* Log4J */ - // || ndpi_check_dga_name(ndpi_struct, NULL, ua, 0) - // || ndpi_match_bigram(ndpi_struct, &ndpi_struct->impossible_bigrams_automa, ua) - ) { + if((!strncmp(ua, "<?", 2)) + || strchr(ua, '$') + // || ndpi_check_dga_name(ndpi_struct, NULL, ua, 0) + // || ndpi_match_bigram(ndpi_struct, &ndpi_struct->impossible_bigrams_automa, ua) + ) ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT); + if((double_slash = strstr(ua, "://")) != NULL) { + if(double_slash != ua) /* We're not at the beginning of the user agent */{ + if((double_slash[-1] != 'p') /* http:// */ + && (double_slash[-1] != 's') /* https:// */) + ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT); + } + } + + /* no else */ + if(!strncmp(ua, "jndi:ldap://", 12)) /* Log4J */ { ndpi_set_risk(ndpi_struct, flow, NDPI_POSSIBLE_EXPLOIT); } else if( - (len < 4) /* Too short */ - || (len > 256) /* Too long */ - || (!strncmp(ua, "test", 4)) - || strchr(ua, '{') - || strchr(ua, '}') - ) { + (len < 4) /* Too short */ + || (len > 256) /* Too long */ + || (!strncmp(ua, "test", 4)) + || strchr(ua, '{') + || strchr(ua, '}') + ) { ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_SUSPICIOUS_USER_AGENT); } + + /* + Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots) + Amazon-Route53-Health-Check-Service (ref 68784dad-be98-49e4-a63c-9fbbe2816d7c; report http://amzn.to/1vsZADi) + Anonymous Crawler/1.0 (Webcrawler developed with StormCrawler; http://example.com/; webcrawler@example.com) + */ + if((strstr(ua, "+http") != NULL) + || (strstr(ua, " http") != NULL) + || strcasestr(ua, "Crawler") + || strcasestr(ua, "Bot") /* bot/robot */ + ) { + ndpi_set_risk(ndpi_struct, flow, NDPI_HTTP_CRAWLER_BOT); + } } /* ************************************************************* */ diff --git a/tests/pcap/bot.pcap b/tests/pcap/bot.pcap Binary files differnew file mode 100644 index 000000000..016c71859 --- /dev/null +++ b/tests/pcap/bot.pcap diff --git a/tests/result/bot.pcap.out b/tests/result/bot.pcap.out new file mode 100644 index 000000000..2c3b2cf00 --- /dev/null +++ b/tests/result/bot.pcap.out @@ -0,0 +1,8 @@ +Guessed flow protos: 0 + +DPI Packets (TCP): 6 (6.00 pkts/flow) +Confidence DPI : 1 (flows) + +Azure 402 431124 1 + + 1 TCP 40.77.167.36:64768 <-> 89.31.72.220:80 [VLAN: 77][proto: 7.276/HTTP.Azure][ClearText][Confidence: DPI][cat: Cloud/13][115 pkts/7672 bytes <-> 287 pkts/423452 bytes][Goodput ratio: 4/96][5.66 sec][Hostname/SNI: atlanteditorino.it][bytes ratio: -0.964 (Download)][IAT c2s/s2c min/avg/max/stddev: 0/0 58/3 4532/106 489/16][Pkt Len c2s/s2c min/avg/max/stddev: 64/64 67/1475 374/1498 29/171][URL: atlanteditorino.it/quartieri/img/S.Donato_M.Vittoria1930_B.jpg][StatusCode: 200][Content-Type: image/jpeg][User-Agent: Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)][Risk: ** Crawler/Bot Detected **][Risk Score: 10][PLAIN TEXT (GET /quartieri/im)][Plen Bins: 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,100,0,0] diff --git a/wireshark/ndpi.lua b/wireshark/ndpi.lua index 68b71e9b4..28a1c6506 100644 --- a/wireshark/ndpi.lua +++ b/wireshark/ndpi.lua @@ -82,6 +82,7 @@ flow_risks[40] = ProtoField.bool("ndpi.flow_risk.possible_exploit", "Possible Ex flow_risks[41] = ProtoField.bool("ndpi.flow_risk.cert_about_to_expire", "TLS cert about to expire", num_bits_flow_risks, nil, bit(9), "nDPI Flow Risk: TLS certificate about to expire") flow_risks[42] = ProtoField.bool("ndpi.flow_risk.punycode_idn", "IDN Domain Name", num_bits_flow_risks, nil, bit(10), "nDPI Flow Risk: IDN Domain Name") flow_risks[43] = ProtoField.bool("ndpi.flow_risk.error_code_detected", "Error Code Detected", num_bits_flow_risks, nil, bit(11), "nDPI Flow Risk: Error Code Detected") +flow_risks[44] = ProtoField.bool("ndpi.flow_risk.crawler_bot", "Crawler/Bot Detected", num_bits_flow_risks, nil, bit(12), "nDPI Flow Risk: Crawler/Bot Detected") -- Last one: keep in sync the bitmask when adding new risks!! flow_risks[64] = ProtoField.new("Unused", "ndpi.flow_risk.unused", ftypes.UINT32, nil, base.HEX, bit(32) - bit(10)) |