aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorIvan Nardi <12729895+IvanNardi@users.noreply.github.com>2023-07-26 09:09:12 +0200
committerGitHub <noreply@github.com>2023-07-26 09:09:12 +0200
commit3326fa258ec92e553e39fc8a1bfa3921dc81f15c (patch)
treefcd0e725b7b5a8d13db1654a9b0864651c642f00 /src
parent2b230e28e0612e8654ad617534deb9aaaabd51b7 (diff)
Add an heuristic to detect fully encrypted flows (#2058)
A fully encrypted session is a flow where every bytes of the payload is encrypted in an attempt to “look like nothing”. The heuristic needs only the very first packet of the flow. See: https://www.usenix.org/system/files/sec23fall-prepub-234-wu-mingshi.pdf A basic, but generic, inplementation of the popcpunt alg has been added
Diffstat (limited to 'src')
-rw-r--r--src/include/ndpi_api.h7
-rw-r--r--src/include/ndpi_typedefs.h17
-rw-r--r--src/include/ndpi_win32.h5
-rw-r--r--src/lib/ndpi_analyze.c39
-rw-r--r--src/lib/ndpi_main.c70
-rw-r--r--src/lib/ndpi_utils.c3
6 files changed, 139 insertions, 2 deletions
diff --git a/src/include/ndpi_api.h b/src/include/ndpi_api.h
index 653510e82..121c3f7f8 100644
--- a/src/include/ndpi_api.h
+++ b/src/include/ndpi_api.h
@@ -1843,6 +1843,13 @@ extern "C" {
/* ******************************* */
+ /* PopCount [count how many bits are set to 1] */
+
+ int ndpi_popcount_init(struct ndpi_popcount *h);
+ void ndpi_popcount_count(struct ndpi_popcount *h, const u_int8_t *buf, u_int32_t buf_len);
+
+ /* ******************************* */
+
int ndpi_init_bin(struct ndpi_bin *b, enum ndpi_bin_family f, u_int16_t num_bins);
void ndpi_free_bin(struct ndpi_bin *b);
struct ndpi_bin* ndpi_clone_bin(struct ndpi_bin *b);
diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h
index 9844e7400..3d15517fc 100644
--- a/src/include/ndpi_typedefs.h
+++ b/src/include/ndpi_typedefs.h
@@ -145,7 +145,8 @@ typedef enum {
NDPI_HTTP_OBSOLETE_SERVER,
NDPI_PERIODIC_FLOW, /* Set in case a flow repeats at a specific pace [used by apps on top of nDPI] */
NDPI_MINOR_ISSUES, /* Generic packet issues (e.g. DNS with 0 TTL) */
- NDPI_TCP_ISSUES, /* TCP issues such as connection failed, probing or scan */
+ NDPI_TCP_ISSUES, /* 50 */ /* TCP issues such as connection failed, probing or scan */
+ NDPI_FULLY_ENCRYPTED, /* This (unknown) session is fully encrypted */
/* Leave this as last member */
NDPI_MAX_RISK /* must be <= 63 due to (**) */
@@ -1323,6 +1324,7 @@ struct ndpi_detection_module_struct {
u_int32_t aggressiveness_ookla;
int tcp_ack_paylod_heuristic;
+ int fully_encrypted_based_on_first_pkt_heuristic;
u_int16_t ndpi_to_user_proto_id[NDPI_MAX_NUM_CUSTOM_PROTOCOLS]; /* custom protocolId mapping */
ndpi_proto_defaults_t proto_defaults[NDPI_MAX_SUPPORTED_PROTOCOLS+NDPI_MAX_NUM_CUSTOM_PROTOCOLS];
@@ -1379,7 +1381,8 @@ struct ndpi_flow_struct {
/* init parameter, internal used to set up timestamp,... */
u_int16_t guessed_protocol_id, guessed_protocol_id_by_ip, guessed_category, guessed_header_category;
u_int8_t l4_proto, protocol_id_already_guessed:1, fail_with_unknown:1,
- init_finished:1, client_packet_direction:1, packet_direction:1, is_ipv6:1, _pad1: 2;
+ init_finished:1, client_packet_direction:1, packet_direction:1, is_ipv6:1, first_pkt_fully_encrypted:1, _pad1: 1;
+
u_int16_t num_dissector_calls;
ndpi_confidence_t confidence; /* ndpi_confidence_t */
@@ -1753,6 +1756,11 @@ typedef enum {
ndpi_dont_load_crawlers_list = (1 << 18),
ndpi_dont_load_protonvpn_list = (1 << 19),
ndpi_dont_load_gambling_list = (1 << 20),
+ /* Heuristic to detect fully encrypted sessions, i.e. flows where every bytes of
+ the payload is encrypted in an attempt to “look like nothing”.
+ This heuristic only analyzes the first packet of the flow.
+ See: https://www.usenix.org/system/files/sec23fall-prepub-234-wu-mingshi.pdf */
+ ndpi_disable_fully_encrypted_heuristic = (1 << 21),
} ndpi_prefs;
typedef struct {
@@ -1912,6 +1920,11 @@ struct ndpi_cm_sketch {
u_int32_t *tables;
};
+struct ndpi_popcount {
+ u_int64_t pop_count; /* Number of bits set to 1 found so far */
+ u_int64_t tot_bytes_count; /* Total number of bytes processed so far */
+};
+
/* **************************************** */
enum ndpi_bin_family {
diff --git a/src/include/ndpi_win32.h b/src/include/ndpi_win32.h
index 721ba48a4..2ad8602aa 100644
--- a/src/include/ndpi_win32.h
+++ b/src/include/ndpi_win32.h
@@ -78,4 +78,9 @@ typedef unsigned __int64 u_int64_t;
/* https://stackoverflow.com/questions/7993050/multiplatform-atomic-increment */
#define __sync_fetch_and_add(a,b) InterlockedExchangeAdd ((a), b)
+#if defined(WIN32) || defined(WIN64)
+#include <intrin.h>
+#define __builtin_popcount __popcnt
+#endif
+
#endif /* __NDPI_WIN32_H__ */
diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c
index f7f9784b6..17f755026 100644
--- a/src/lib/ndpi_analyze.c
+++ b/src/lib/ndpi_analyze.c
@@ -1831,3 +1831,42 @@ void ndpi_cm_sketch_destroy(struct ndpi_cm_sketch *sketch) {
ndpi_free(sketch->tables);
ndpi_free(sketch);
}
+
+/* ********************************************************************************* */
+/* ********************************************************************************* */
+
+/* Popcount, short for "population count," is a computer programming term that refers to
+ the number of set bits (bits with a value of 1) in a binary representation of a given
+ data word or integer. In other words, it is the count of all the 1s present in the
+ binary representation of a number.
+ For example, consider the number 45, which is represented in binary as 101101.
+ The popcount of 45 would be 4 because there are four 1s in its binary representation.
+*/
+
+int ndpi_popcount_init(struct ndpi_popcount *h)
+{
+ if(h) {
+ memset(h, '\0', sizeof(*h));
+ return 0;
+ }
+ return -1;
+}
+
+/* ********************************************************************************* */
+
+void ndpi_popcount_count(struct ndpi_popcount *h, const u_int8_t *buf, u_int32_t buf_len)
+{
+ u_int32_t i;
+
+ if(!h)
+ return;
+
+ /* Trivial alg. TODO: there are lots of better, more performant algorithms */
+
+ for(i = 0; i < buf_len / 4; i++)
+ h->pop_count += __builtin_popcount(*(u_int32_t *)(buf + i * 4));
+ for(i = 0; i < buf_len % 4; i++)
+ h->pop_count += __builtin_popcount(buf[buf_len - (buf_len % 4) + i]);
+
+ h->tot_bytes_count += buf_len;
+}
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index dcb66cfde..ac5371dfe 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -186,6 +186,7 @@ static ndpi_risk_info ndpi_known_risks[] = {
{ NDPI_PERIODIC_FLOW, NDPI_RISK_LOW, CLIENT_LOW_RISK_PERCENTAGE, NDPI_CLIENT_ACCOUNTABLE },
{ NDPI_MINOR_ISSUES, NDPI_RISK_LOW, CLIENT_LOW_RISK_PERCENTAGE, NDPI_BOTH_ACCOUNTABLE },
{ NDPI_TCP_ISSUES, NDPI_RISK_MEDIUM, CLIENT_FAIR_RISK_PERCENTAGE, NDPI_CLIENT_ACCOUNTABLE },
+ { NDPI_FULLY_ENCRYPTED, NDPI_RISK_MEDIUM, CLIENT_FAIR_RISK_PERCENTAGE, NDPI_CLIENT_ACCOUNTABLE },
/* Leave this as last member */
{ NDPI_MAX_RISK, NDPI_RISK_LOW, CLIENT_FAIR_RISK_PERCENTAGE, NDPI_NO_ACCOUNTABILITY }
@@ -3062,6 +3063,9 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs
if(prefs & ndpi_enable_tcp_ack_payload_heuristic)
ndpi_str->tcp_ack_paylod_heuristic = 1;
+ if(!(prefs & ndpi_disable_fully_encrypted_heuristic))
+ ndpi_str->fully_encrypted_based_on_first_pkt_heuristic = 1;
+
for(i = 0; i < NUM_CUSTOM_CATEGORIES; i++)
ndpi_snprintf(ndpi_str->custom_category_labels[i], CUSTOM_CATEGORY_LABEL_LEN, "User custom category %u",
(unsigned int) (i + 1));
@@ -5655,6 +5659,60 @@ static u_int8_t ndpi_is_multi_or_broadcast(struct ndpi_packet_struct *packet) {
/* ************************************************ */
+static int fully_enc_heuristic(struct ndpi_detection_module_struct *ndpi_str,
+ struct ndpi_flow_struct *flow) {
+ struct ndpi_packet_struct *packet = &ndpi_str->packet;
+ struct ndpi_popcount popcount;
+ float ratio;
+ unsigned int i, len, cnt, cnt_consecutives = 0;
+
+ if(flow->l4_proto == IPPROTO_TCP &&
+ ndpi_seen_flow_beginning(flow)) {
+ /* See original paper, Algorithm 1, for the reference numbers */
+
+ /* Ex1 */
+ ndpi_popcount_init(&popcount);
+ ndpi_popcount_count(&popcount, packet->payload, packet->payload_packet_len);
+ ratio = (float)popcount.pop_count / (float)popcount.tot_bytes_count;
+ if(ratio <= 3.4 || ratio >= 4.6) {
+ return 0;
+ }
+
+ /* Ex2 */
+ len = ndpi_min(6, packet->payload_packet_len);
+ cnt = 0;
+ for(i = 0; i < len; i++) {
+ if(ndpi_isprint(packet->payload[i]))
+ cnt += 1;
+ }
+ if(cnt == len) {
+ return 0;
+ }
+
+ /* Ex3 */
+ cnt = 0;
+ for(i = 0; i < packet->payload_packet_len; i++) {
+ if(ndpi_isprint(packet->payload[i])) {
+ cnt += 1;
+ cnt_consecutives += 1;
+ if(cnt_consecutives >= 20) { /* Ex4 */
+ return 0;;
+ }
+ } else {
+ cnt_consecutives = 0;
+ }
+ }
+ if((float)cnt / packet->payload_packet_len > 0.5) {
+ return 0;
+ }
+
+ return 1;
+ }
+ return 0;
+}
+
+/* ************************************************ */
+
static int tcp_ack_padding(struct ndpi_packet_struct *packet) {
const struct ndpi_tcphdr *tcph = packet->tcp;
if(tcph && tcph->ack && !tcph->psh &&
@@ -6553,6 +6611,12 @@ ndpi_protocol ndpi_detection_giveup(struct ndpi_detection_module_struct *ndpi_st
ret.app_protocol = flow->detected_protocol_stack[0];
}
+ /* TODO: not sure about the best "order" among fully encrypted logic, classification by-port and classification by-ip...*/
+ if(ret.app_protocol == NDPI_PROTOCOL_UNKNOWN &&
+ flow->first_pkt_fully_encrypted == 1) {
+ ndpi_set_risk(ndpi_str, flow, NDPI_FULLY_ENCRYPTED, NULL);
+ }
+
/* Classification by-port */
if(enable_guess && ret.app_protocol == NDPI_PROTOCOL_UNKNOWN) {
@@ -7229,6 +7293,12 @@ static ndpi_protocol ndpi_internal_detection_process_packet(struct ndpi_detectio
&& (flow->l4_proto == IPPROTO_TCP))
ndpi_add_connection_as_zoom(ndpi_str, flow);
+ if(ndpi_str->fully_encrypted_based_on_first_pkt_heuristic &&
+ ret.app_protocol == NDPI_PROTOCOL_UNKNOWN && /* Only for unknown traffic */
+ flow->packet_counter == 1 && packet->payload_packet_len > 0) {
+ flow->first_pkt_fully_encrypted = fully_enc_heuristic(ndpi_str, flow);
+ }
+
return(ret);
}
diff --git a/src/lib/ndpi_utils.c b/src/lib/ndpi_utils.c
index 5f334081b..35c0410e2 100644
--- a/src/lib/ndpi_utils.c
+++ b/src/lib/ndpi_utils.c
@@ -2045,6 +2045,9 @@ const char* ndpi_risk2str(ndpi_risk_enum risk) {
case NDPI_TCP_ISSUES:
return("TCP Connection Issues");
+ case NDPI_FULLY_ENCRYPTED:
+ return("Fully encrypted flow");
+
default:
ndpi_snprintf(buf, sizeof(buf), "%d", (int)risk);
return(buf);