diff options
author | Ivan Nardi <12729895+IvanNardi@users.noreply.github.com> | 2023-07-26 09:09:12 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-26 09:09:12 +0200 |
commit | 3326fa258ec92e553e39fc8a1bfa3921dc81f15c (patch) | |
tree | fcd0e725b7b5a8d13db1654a9b0864651c642f00 /src | |
parent | 2b230e28e0612e8654ad617534deb9aaaabd51b7 (diff) |
Add an heuristic to detect fully encrypted flows (#2058)
A fully encrypted session is a flow where every bytes of the
payload is encrypted in an attempt to “look like nothing”.
The heuristic needs only the very first packet of the flow.
See: https://www.usenix.org/system/files/sec23fall-prepub-234-wu-mingshi.pdf
A basic, but generic, inplementation of the popcpunt alg has been added
Diffstat (limited to 'src')
-rw-r--r-- | src/include/ndpi_api.h | 7 | ||||
-rw-r--r-- | src/include/ndpi_typedefs.h | 17 | ||||
-rw-r--r-- | src/include/ndpi_win32.h | 5 | ||||
-rw-r--r-- | src/lib/ndpi_analyze.c | 39 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 70 | ||||
-rw-r--r-- | src/lib/ndpi_utils.c | 3 |
6 files changed, 139 insertions, 2 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h index 653510e82..121c3f7f8 100644 --- a/src/include/ndpi_api.h +++ b/src/include/ndpi_api.h @@ -1843,6 +1843,13 @@ extern "C" { /* ******************************* */ + /* PopCount [count how many bits are set to 1] */ + + int ndpi_popcount_init(struct ndpi_popcount *h); + void ndpi_popcount_count(struct ndpi_popcount *h, const u_int8_t *buf, u_int32_t buf_len); + + /* ******************************* */ + int ndpi_init_bin(struct ndpi_bin *b, enum ndpi_bin_family f, u_int16_t num_bins); void ndpi_free_bin(struct ndpi_bin *b); struct ndpi_bin* ndpi_clone_bin(struct ndpi_bin *b); diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h index 9844e7400..3d15517fc 100644 --- a/src/include/ndpi_typedefs.h +++ b/src/include/ndpi_typedefs.h @@ -145,7 +145,8 @@ typedef enum { NDPI_HTTP_OBSOLETE_SERVER, NDPI_PERIODIC_FLOW, /* Set in case a flow repeats at a specific pace [used by apps on top of nDPI] */ NDPI_MINOR_ISSUES, /* Generic packet issues (e.g. DNS with 0 TTL) */ - NDPI_TCP_ISSUES, /* TCP issues such as connection failed, probing or scan */ + NDPI_TCP_ISSUES, /* 50 */ /* TCP issues such as connection failed, probing or scan */ + NDPI_FULLY_ENCRYPTED, /* This (unknown) session is fully encrypted */ /* Leave this as last member */ NDPI_MAX_RISK /* must be <= 63 due to (**) */ @@ -1323,6 +1324,7 @@ struct ndpi_detection_module_struct { u_int32_t aggressiveness_ookla; int tcp_ack_paylod_heuristic; + int fully_encrypted_based_on_first_pkt_heuristic; u_int16_t ndpi_to_user_proto_id[NDPI_MAX_NUM_CUSTOM_PROTOCOLS]; /* custom protocolId mapping */ ndpi_proto_defaults_t proto_defaults[NDPI_MAX_SUPPORTED_PROTOCOLS+NDPI_MAX_NUM_CUSTOM_PROTOCOLS]; @@ -1379,7 +1381,8 @@ struct ndpi_flow_struct { /* init parameter, internal used to set up timestamp,... */ u_int16_t guessed_protocol_id, guessed_protocol_id_by_ip, guessed_category, guessed_header_category; u_int8_t l4_proto, protocol_id_already_guessed:1, fail_with_unknown:1, - init_finished:1, client_packet_direction:1, packet_direction:1, is_ipv6:1, _pad1: 2; + init_finished:1, client_packet_direction:1, packet_direction:1, is_ipv6:1, first_pkt_fully_encrypted:1, _pad1: 1; + u_int16_t num_dissector_calls; ndpi_confidence_t confidence; /* ndpi_confidence_t */ @@ -1753,6 +1756,11 @@ typedef enum { ndpi_dont_load_crawlers_list = (1 << 18), ndpi_dont_load_protonvpn_list = (1 << 19), ndpi_dont_load_gambling_list = (1 << 20), + /* Heuristic to detect fully encrypted sessions, i.e. flows where every bytes of + the payload is encrypted in an attempt to “look like nothing”. + This heuristic only analyzes the first packet of the flow. + See: https://www.usenix.org/system/files/sec23fall-prepub-234-wu-mingshi.pdf */ + ndpi_disable_fully_encrypted_heuristic = (1 << 21), } ndpi_prefs; typedef struct { @@ -1912,6 +1920,11 @@ struct ndpi_cm_sketch { u_int32_t *tables; }; +struct ndpi_popcount { + u_int64_t pop_count; /* Number of bits set to 1 found so far */ + u_int64_t tot_bytes_count; /* Total number of bytes processed so far */ +}; + /* **************************************** */ enum ndpi_bin_family { diff --git a/src/include/ndpi_win32.h b/src/include/ndpi_win32.h index 721ba48a4..2ad8602aa 100644 --- a/src/include/ndpi_win32.h +++ b/src/include/ndpi_win32.h @@ -78,4 +78,9 @@ typedef unsigned __int64 u_int64_t; /* https://stackoverflow.com/questions/7993050/multiplatform-atomic-increment */ #define __sync_fetch_and_add(a,b) InterlockedExchangeAdd ((a), b) +#if defined(WIN32) || defined(WIN64) +#include <intrin.h> +#define __builtin_popcount __popcnt +#endif + #endif /* __NDPI_WIN32_H__ */ diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c index f7f9784b6..17f755026 100644 --- a/src/lib/ndpi_analyze.c +++ b/src/lib/ndpi_analyze.c @@ -1831,3 +1831,42 @@ void ndpi_cm_sketch_destroy(struct ndpi_cm_sketch *sketch) { ndpi_free(sketch->tables); ndpi_free(sketch); } + +/* ********************************************************************************* */ +/* ********************************************************************************* */ + +/* Popcount, short for "population count," is a computer programming term that refers to + the number of set bits (bits with a value of 1) in a binary representation of a given + data word or integer. In other words, it is the count of all the 1s present in the + binary representation of a number. + For example, consider the number 45, which is represented in binary as 101101. + The popcount of 45 would be 4 because there are four 1s in its binary representation. +*/ + +int ndpi_popcount_init(struct ndpi_popcount *h) +{ + if(h) { + memset(h, '\0', sizeof(*h)); + return 0; + } + return -1; +} + +/* ********************************************************************************* */ + +void ndpi_popcount_count(struct ndpi_popcount *h, const u_int8_t *buf, u_int32_t buf_len) +{ + u_int32_t i; + + if(!h) + return; + + /* Trivial alg. TODO: there are lots of better, more performant algorithms */ + + for(i = 0; i < buf_len / 4; i++) + h->pop_count += __builtin_popcount(*(u_int32_t *)(buf + i * 4)); + for(i = 0; i < buf_len % 4; i++) + h->pop_count += __builtin_popcount(buf[buf_len - (buf_len % 4) + i]); + + h->tot_bytes_count += buf_len; +} diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index dcb66cfde..ac5371dfe 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -186,6 +186,7 @@ static ndpi_risk_info ndpi_known_risks[] = { { NDPI_PERIODIC_FLOW, NDPI_RISK_LOW, CLIENT_LOW_RISK_PERCENTAGE, NDPI_CLIENT_ACCOUNTABLE }, { NDPI_MINOR_ISSUES, NDPI_RISK_LOW, CLIENT_LOW_RISK_PERCENTAGE, NDPI_BOTH_ACCOUNTABLE }, { NDPI_TCP_ISSUES, NDPI_RISK_MEDIUM, CLIENT_FAIR_RISK_PERCENTAGE, NDPI_CLIENT_ACCOUNTABLE }, + { NDPI_FULLY_ENCRYPTED, NDPI_RISK_MEDIUM, CLIENT_FAIR_RISK_PERCENTAGE, NDPI_CLIENT_ACCOUNTABLE }, /* Leave this as last member */ { NDPI_MAX_RISK, NDPI_RISK_LOW, CLIENT_FAIR_RISK_PERCENTAGE, NDPI_NO_ACCOUNTABILITY } @@ -3062,6 +3063,9 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs if(prefs & ndpi_enable_tcp_ack_payload_heuristic) ndpi_str->tcp_ack_paylod_heuristic = 1; + if(!(prefs & ndpi_disable_fully_encrypted_heuristic)) + ndpi_str->fully_encrypted_based_on_first_pkt_heuristic = 1; + for(i = 0; i < NUM_CUSTOM_CATEGORIES; i++) ndpi_snprintf(ndpi_str->custom_category_labels[i], CUSTOM_CATEGORY_LABEL_LEN, "User custom category %u", (unsigned int) (i + 1)); @@ -5655,6 +5659,60 @@ static u_int8_t ndpi_is_multi_or_broadcast(struct ndpi_packet_struct *packet) { /* ************************************************ */ +static int fully_enc_heuristic(struct ndpi_detection_module_struct *ndpi_str, + struct ndpi_flow_struct *flow) { + struct ndpi_packet_struct *packet = &ndpi_str->packet; + struct ndpi_popcount popcount; + float ratio; + unsigned int i, len, cnt, cnt_consecutives = 0; + + if(flow->l4_proto == IPPROTO_TCP && + ndpi_seen_flow_beginning(flow)) { + /* See original paper, Algorithm 1, for the reference numbers */ + + /* Ex1 */ + ndpi_popcount_init(&popcount); + ndpi_popcount_count(&popcount, packet->payload, packet->payload_packet_len); + ratio = (float)popcount.pop_count / (float)popcount.tot_bytes_count; + if(ratio <= 3.4 || ratio >= 4.6) { + return 0; + } + + /* Ex2 */ + len = ndpi_min(6, packet->payload_packet_len); + cnt = 0; + for(i = 0; i < len; i++) { + if(ndpi_isprint(packet->payload[i])) + cnt += 1; + } + if(cnt == len) { + return 0; + } + + /* Ex3 */ + cnt = 0; + for(i = 0; i < packet->payload_packet_len; i++) { + if(ndpi_isprint(packet->payload[i])) { + cnt += 1; + cnt_consecutives += 1; + if(cnt_consecutives >= 20) { /* Ex4 */ + return 0;; + } + } else { + cnt_consecutives = 0; + } + } + if((float)cnt / packet->payload_packet_len > 0.5) { + return 0; + } + + return 1; + } + return 0; +} + +/* ************************************************ */ + static int tcp_ack_padding(struct ndpi_packet_struct *packet) { const struct ndpi_tcphdr *tcph = packet->tcp; if(tcph && tcph->ack && !tcph->psh && @@ -6553,6 +6611,12 @@ ndpi_protocol ndpi_detection_giveup(struct ndpi_detection_module_struct *ndpi_st ret.app_protocol = flow->detected_protocol_stack[0]; } + /* TODO: not sure about the best "order" among fully encrypted logic, classification by-port and classification by-ip...*/ + if(ret.app_protocol == NDPI_PROTOCOL_UNKNOWN && + flow->first_pkt_fully_encrypted == 1) { + ndpi_set_risk(ndpi_str, flow, NDPI_FULLY_ENCRYPTED, NULL); + } + /* Classification by-port */ if(enable_guess && ret.app_protocol == NDPI_PROTOCOL_UNKNOWN) { @@ -7229,6 +7293,12 @@ static ndpi_protocol ndpi_internal_detection_process_packet(struct ndpi_detectio && (flow->l4_proto == IPPROTO_TCP)) ndpi_add_connection_as_zoom(ndpi_str, flow); + if(ndpi_str->fully_encrypted_based_on_first_pkt_heuristic && + ret.app_protocol == NDPI_PROTOCOL_UNKNOWN && /* Only for unknown traffic */ + flow->packet_counter == 1 && packet->payload_packet_len > 0) { + flow->first_pkt_fully_encrypted = fully_enc_heuristic(ndpi_str, flow); + } + return(ret); } diff --git a/src/lib/ndpi_utils.c b/src/lib/ndpi_utils.c index 5f334081b..35c0410e2 100644 --- a/src/lib/ndpi_utils.c +++ b/src/lib/ndpi_utils.c @@ -2045,6 +2045,9 @@ const char* ndpi_risk2str(ndpi_risk_enum risk) { case NDPI_TCP_ISSUES: return("TCP Connection Issues"); + case NDPI_FULLY_ENCRYPTED: + return("Fully encrypted flow"); + default: ndpi_snprintf(buf, sizeof(buf), "%d", (int)risk); return(buf); |