aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2021-02-21 21:33:26 +0100
committerLuca Deri <deri@ntop.org>2021-02-21 21:45:46 +0100
commitfc16c9368e1f5ba93144115d687fd2ce09f50955 (patch)
treed85dacc29238bae062e38ca34b9a111d0fd8cfa0
parent6db20b3ba932a826002c7af3d02cce5d8a0c361d (diff)
Added risky domain flow-risk support
-rw-r--r--example/ndpiReader.c20
-rw-r--r--example/risky_domains.txt11
-rw-r--r--src/include/ndpi_api.h.in10
-rw-r--r--src/include/ndpi_typedefs.h5
-rw-r--r--src/lib/ndpi_main.c103
-rw-r--r--tests/pcap/tk.pcapbin0 -> 686 bytes
-rw-r--r--tests/result/tk.pcap.out5
7 files changed, 135 insertions, 19 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index 7681d1627..591480ff3 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -65,8 +65,9 @@ static FILE *playlist_fp[MAX_NUM_READER_THREADS] = { NULL }; /**< Ingress playli
static FILE *results_file = NULL;
static char *results_path = NULL;
static char * bpfFilter = NULL; /**< bpf filter */
-static char *_protoFilePath = NULL; /**< Protocol file path */
+static char *_protoFilePath = NULL; /**< Protocol file path */
static char *_customCategoryFilePath= NULL; /**< Custom categories file path */
+static char *_riskyDomainFilePath = NULL; /**< Risky domain files */
static u_int8_t live_capture = 0;
static u_int8_t undetected_flows_deleted = 0;
FILE *csv_fp = NULL; /**< for CSV export */
@@ -437,7 +438,7 @@ static void help(u_int long_help) {
"[-f <filter>][-s <duration>][-m <duration>][-b <num bin clusters>]\n"
" [-p <protos>][-l <loops> [-q][-d][-J][-h][-D][-e <len>][-t][-v <level>]\n"
" [-n <threads>][-w <file>][-c <file>][-C <file>][-j <file>][-x <file>]\n"
- " [-T <num>][-U <num>] [-x <domain>]\n\n"
+ " [-r <file>][-T <num>][-U <num>] [-x <domain>]\n\n"
"Usage:\n"
" -i <file.pcap|device> | Specify a pcap file/playlist to read packets from or a\n"
" | device for live capture (comma-separated list)\n"
@@ -465,9 +466,9 @@ static void help(u_int long_help) {
" | <d> = max packet payload dissection\n"
" | <d> = max num reported payloads\n"
" | Default: %u:%u:%u:%u:%u\n"
- " -r | Print nDPI version and git revision\n"
" -c <path> | Load custom categories from the specified file\n"
" -C <path> | Write output in CSV format on the specified file\n"
+ " -r <path> | Load risky domain file\n"
" -w <path> | Write test output on the specified file. This is useful for\n"
" | testing purposes in order to compare results across runs\n"
" -h | This help\n"
@@ -762,7 +763,7 @@ static void parseOptions(int argc, char **argv) {
}
#endif
- while((opt = getopt_long(argc, argv, "b:e:c:C:dDf:g:i:Ihp:P:l:s:tu:v:V:n:Jrp:x:w:q0123:456:7:89:m:T:U:",
+ while((opt = getopt_long(argc, argv, "b:e:c:C:dDf:g:i:Ihp:P:l:r:s:tu:v:V:n:Jrp:x:w:q0123:456:7:89:m:T:U:",
longopts, &option_idx)) != EOF) {
#ifdef DEBUG_TRACE
if(trace) fprintf(trace, " #### -%c [%s] #### \n", opt, optarg ? optarg : "");
@@ -829,6 +830,10 @@ static void parseOptions(int argc, char **argv) {
printf("Unable to write on CSV file %s\n", optarg);
break;
+ case 'r':
+ _riskyDomainFilePath = optarg;
+ break;
+
case 's':
capture_for = atoi(optarg);
capture_until = capture_for + time(NULL);
@@ -838,10 +843,6 @@ static void parseOptions(int argc, char **argv) {
decode_tunnels = 1;
break;
- case 'r':
- printf("ndpiReader - nDPI (%s)\n", ndpi_revision());
- exit(0);
-
case 'v':
verbose = atoi(optarg);
break;
@@ -2056,6 +2057,9 @@ static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle) {
if(_customCategoryFilePath)
ndpi_load_categories_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _customCategoryFilePath);
+ if(_riskyDomainFilePath)
+ ndpi_load_risk_domain_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _riskyDomainFilePath);
+
ndpi_finalize_initalization(ndpi_thread_info[thread_id].workflow->ndpi_struct);
if(enable_doh_dot_detection)
diff --git a/example/risky_domains.txt b/example/risky_domains.txt
new file mode 100644
index 000000000..5e0eb5af0
--- /dev/null
+++ b/example/risky_domains.txt
@@ -0,0 +1,11 @@
+# https://www.spamhaus.org/statistics/tlds/
+.fail
+.viajes
+.exposed
+.london
+.work
+.gq
+.ml
+.surk
+.tk
+.xyz
diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in
index e701c51d4..8163c0553 100644
--- a/src/include/ndpi_api.h.in
+++ b/src/include/ndpi_api.h.in
@@ -695,6 +695,16 @@ extern "C" {
*/
int ndpi_load_categories_file(struct ndpi_detection_module_struct *ndpi_str, const char* path);
+ /**
+ * Read a file and load the list of risky domains
+ *
+ * @par ndpi_mod = the detection module
+ * @par path = the path of the file
+ * @return 0 if the file is loaded correctly;
+ * -1 else
+ */
+ int ndpi_load_risk_domain_file(struct ndpi_detection_module_struct *ndpi_str, const char* path);
+
/**
* Get the total number of the supported protocols
*
diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h
index 3f86ec416..795d78b02 100644
--- a/src/include/ndpi_typedefs.h
+++ b/src/include/ndpi_typedefs.h
@@ -1094,11 +1094,12 @@ struct ndpi_detection_module_struct {
u_int ndpi_num_supported_protocols;
u_int ndpi_num_custom_protocols;
- /* HTTP/DNS/HTTPS host matching */
+ /* HTTP/DNS/HTTPS/QUIC host matching */
ndpi_automa host_automa, /* Used for DNS/HTTPS */
content_automa, /* Used for HTTP subprotocol_detection */
subprotocol_automa, /* Used for HTTP subprotocol_detection */
- bigrams_automa, impossible_bigrams_automa; /* TOR */
+ bigrams_automa, impossible_bigrams_automa, /* TOR */
+ risky_domain_automa;
/* IMPORTANT: please update ndpi_finalize_initalization() whenever you add a new automa */
struct {
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index eaf6206b8..e00852aa5 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -2117,7 +2117,8 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs
ndpi_str->content_automa.ac_automa = ac_automata_init(ac_match_handler);
ndpi_str->bigrams_automa.ac_automa = ac_automata_init(ac_match_handler);
ndpi_str->impossible_bigrams_automa.ac_automa = ac_automata_init(ac_match_handler);
-
+ ndpi_str->risky_domain_automa.ac_automa = NULL; /* Initialized on demand */
+
if((sizeof(categories) / sizeof(char *)) != NDPI_PROTOCOL_NUM_CATEGORIES) {
NDPI_LOG_ERR(ndpi_str, "[NDPI] invalid categories length: expected %u, got %u\n", NDPI_PROTOCOL_NUM_CATEGORIES,
(unsigned int) (sizeof(categories) / sizeof(char *)));
@@ -2149,7 +2150,7 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs
void ndpi_finalize_initalization(struct ndpi_detection_module_struct *ndpi_str) {
u_int i;
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < 5; i++) {
ndpi_automa *automa;
switch(i) {
@@ -2169,12 +2170,16 @@ void ndpi_finalize_initalization(struct ndpi_detection_module_struct *ndpi_str)
automa = &ndpi_str->impossible_bigrams_automa;
break;
+ case 4:
+ automa = &ndpi_str->risky_domain_automa;
+ break;
+
default:
automa = NULL;
break;
}
- if(automa) {
+ if(automa && automa->ac_automa) {
ac_automata_finalize((AC_AUTOMATA_t *) automa->ac_automa);
automa->ac_automa_finalized = 1;
}
@@ -2426,6 +2431,9 @@ void ndpi_exit_detection_module(struct ndpi_detection_module_struct *ndpi_str) {
if(ndpi_str->impossible_bigrams_automa.ac_automa != NULL)
ac_automata_release((AC_AUTOMATA_t *) ndpi_str->impossible_bigrams_automa.ac_automa, 0);
+ if(ndpi_str->risky_domain_automa.ac_automa != NULL)
+ ac_automata_release((AC_AUTOMATA_t *) ndpi_str->risky_domain_automa.ac_automa, 0);
+
if(ndpi_str->custom_categories.hostnames.ac_automa != NULL)
ac_automata_release((AC_AUTOMATA_t *) ndpi_str->custom_categories.hostnames.ac_automa,
1 /* free patterns strings memory */);
@@ -2805,6 +2813,73 @@ int ndpi_load_categories_file(struct ndpi_detection_module_struct *ndpi_str, con
/* ******************************************************************** */
+static int ndpi_load_risky_domain(struct ndpi_detection_module_struct *ndpi_str,
+ char* domain_name) {
+ if(ndpi_str->risky_domain_automa.ac_automa == NULL)
+ ndpi_str->risky_domain_automa.ac_automa = ac_automata_init(ac_match_handler);
+
+ if(ndpi_str->risky_domain_automa.ac_automa) {
+ char buf[64];
+ u_int i, len;
+
+ snprintf(buf, sizeof(buf)-1, "%s$", domain_name);
+ for (i = 0, len = strlen(buf)-1 /* Skip $ */; i < len; i++) buf[i] = tolower(buf[i]);
+
+ return(ndpi_add_string_to_automa(ndpi_str->risky_domain_automa.ac_automa, buf));
+ }
+
+ return(-1);
+}
+
+/* ******************************************************************** */
+
+/*
+ * Format:
+ *
+ * <domain name>
+ *
+ * Notes:
+ * - you can add a .<domain name> to avoid mismatches
+ */
+int ndpi_load_risk_domain_file(struct ndpi_detection_module_struct *ndpi_str, const char *path) {
+ char buffer[128], *line;
+ FILE *fd;
+ int len, num = 0;
+
+ fd = fopen(path, "r");
+
+ if(fd == NULL) {
+ NDPI_LOG_ERR(ndpi_str, "Unable to open file %s [%s]\n", path, strerror(errno));
+ return(-1);
+ }
+
+ while(1) {
+ line = fgets(buffer, sizeof(buffer), fd);
+
+ if(line == NULL)
+ break;
+
+ len = strlen(line);
+
+ if((len <= 1) || (line[0] == '#'))
+ continue;
+
+ line[len - 1] = '\0';
+
+ if(ndpi_load_risky_domain(ndpi_str, line) >= 0)
+ num++;
+ }
+
+ fclose(fd);
+
+ if(ndpi_str->risky_domain_automa.ac_automa)
+ ac_automata_finalize((AC_AUTOMATA_t *)ndpi_str->risky_domain_automa.ac_automa);
+
+ return(num);
+}
+
+/* ******************************************************************** */
+
/*
Format:
<tcp|udp>:<port>,<tcp|udp>:<port>,.....@<proto>
@@ -6415,9 +6490,11 @@ uint8_t ndpi_connection_tracking(struct ndpi_detection_module_struct *ndpi_str,
/* ****************************************************** */
- u_int16_t ndpi_match_host_subprotocol(struct ndpi_detection_module_struct *ndpi_str, struct ndpi_flow_struct *flow,
+ u_int16_t ndpi_match_host_subprotocol(struct ndpi_detection_module_struct *ndpi_str,
+ struct ndpi_flow_struct *flow,
char *string_to_match, u_int string_to_match_len,
- ndpi_protocol_match_result *ret_match, u_int16_t master_protocol_id) {
+ ndpi_protocol_match_result *ret_match,
+ u_int16_t master_protocol_id) {
u_int16_t rc, buf_len, i;
ndpi_protocol_category_t id;
char buf[96];
@@ -6427,8 +6504,7 @@ uint8_t ndpi_connection_tracking(struct ndpi_detection_module_struct *ndpi_str,
buf[i++] = '$'; /* Add trailer $ */
buf[i] = '\0';
- rc = ndpi_automa_match_string_subprotocol(ndpi_str, flow,
- buf, i,
+ rc = ndpi_automa_match_string_subprotocol(ndpi_str, flow, buf, i,
master_protocol_id, ret_match, 1);
id = ret_match->protocol_category;
@@ -6439,12 +6515,20 @@ uint8_t ndpi_connection_tracking(struct ndpi_detection_module_struct *ndpi_str,
}
}
+ if(ndpi_str->risky_domain_automa.ac_automa != NULL) {
+ u_int16_t rc1 = ndpi_match_string(ndpi_str->risky_domain_automa.ac_automa, buf);
+
+ if(rc1 > 0)
+ NDPI_SET_BIT(flow->risk, NDPI_RISKY_DOMAIN);
+ }
+
return(rc);
}
/* **************************************** */
- int ndpi_match_hostname_protocol(struct ndpi_detection_module_struct *ndpi_struct, struct ndpi_flow_struct *flow,
+ int ndpi_match_hostname_protocol(struct ndpi_detection_module_struct *ndpi_struct,
+ struct ndpi_flow_struct *flow,
u_int16_t master_protocol, char *name, u_int name_len) {
ndpi_protocol_match_result ret_match;
u_int16_t subproto, what_len;
@@ -6455,7 +6539,8 @@ uint8_t ndpi_connection_tracking(struct ndpi_detection_module_struct *ndpi_str,
else
what = name, what_len = name_len;
- subproto = ndpi_match_host_subprotocol(ndpi_struct, flow, what, what_len, &ret_match, master_protocol);
+ subproto = ndpi_match_host_subprotocol(ndpi_struct, flow, what, what_len,
+ &ret_match, master_protocol);
if(subproto != NDPI_PROTOCOL_UNKNOWN) {
ndpi_set_detected_protocol(ndpi_struct, flow, subproto, master_protocol);
diff --git a/tests/pcap/tk.pcap b/tests/pcap/tk.pcap
new file mode 100644
index 000000000..25a655b1f
--- /dev/null
+++ b/tests/pcap/tk.pcap
Binary files differ
diff --git a/tests/result/tk.pcap.out b/tests/result/tk.pcap.out
new file mode 100644
index 000000000..59e428e5f
--- /dev/null
+++ b/tests/result/tk.pcap.out
@@ -0,0 +1,5 @@
+DNS 6 566 3
+
+ 1 UDP 192.168.1.178:53820 <-> 192.168.1.1:53 [proto: 5/DNS][cat: Network/14][1 pkts/72 bytes <-> 1 pkts/131 bytes][Goodput ratio: 41/67][0.05 sec][Host: whois.dot.tk][::][Risk: ** Risky domain name **][PLAIN TEXT (freenom)][Plen Bins: 50,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 2 UDP 192.168.1.178:55591 <-> 192.168.1.1:53 [proto: 5/DNS][cat: Network/14][1 pkts/72 bytes <-> 1 pkts/131 bytes][Goodput ratio: 41/67][0.06 sec][Host: whois.dot.tk][::][Risk: ** Risky domain name **][PLAIN TEXT (freenom)][Plen Bins: 50,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
+ 3 UDP 192.168.1.178:51954 <-> 192.168.1.1:53 [proto: 5/DNS][cat: Network/14][1 pkts/72 bytes <-> 1 pkts/88 bytes][Goodput ratio: 41/52][0.10 sec][Host: whois.dot.tk][104.155.55.158][Risk: ** Risky domain name **][Plen Bins: 50,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]