diff options
author | Luca Deri <deri@ntop.org> | 2021-02-21 21:33:26 +0100 |
---|---|---|
committer | Luca Deri <deri@ntop.org> | 2021-02-21 21:45:46 +0100 |
commit | fc16c9368e1f5ba93144115d687fd2ce09f50955 (patch) | |
tree | d85dacc29238bae062e38ca34b9a111d0fd8cfa0 | |
parent | 6db20b3ba932a826002c7af3d02cce5d8a0c361d (diff) |
Added risky domain flow-risk support
-rw-r--r-- | example/ndpiReader.c | 20 | ||||
-rw-r--r-- | example/risky_domains.txt | 11 | ||||
-rw-r--r-- | src/include/ndpi_api.h.in | 10 | ||||
-rw-r--r-- | src/include/ndpi_typedefs.h | 5 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 103 | ||||
-rw-r--r-- | tests/pcap/tk.pcap | bin | 0 -> 686 bytes | |||
-rw-r--r-- | tests/result/tk.pcap.out | 5 |
7 files changed, 135 insertions, 19 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c index 7681d1627..591480ff3 100644 --- a/example/ndpiReader.c +++ b/example/ndpiReader.c @@ -65,8 +65,9 @@ static FILE *playlist_fp[MAX_NUM_READER_THREADS] = { NULL }; /**< Ingress playli static FILE *results_file = NULL; static char *results_path = NULL; static char * bpfFilter = NULL; /**< bpf filter */ -static char *_protoFilePath = NULL; /**< Protocol file path */ +static char *_protoFilePath = NULL; /**< Protocol file path */ static char *_customCategoryFilePath= NULL; /**< Custom categories file path */ +static char *_riskyDomainFilePath = NULL; /**< Risky domain files */ static u_int8_t live_capture = 0; static u_int8_t undetected_flows_deleted = 0; FILE *csv_fp = NULL; /**< for CSV export */ @@ -437,7 +438,7 @@ static void help(u_int long_help) { "[-f <filter>][-s <duration>][-m <duration>][-b <num bin clusters>]\n" " [-p <protos>][-l <loops> [-q][-d][-J][-h][-D][-e <len>][-t][-v <level>]\n" " [-n <threads>][-w <file>][-c <file>][-C <file>][-j <file>][-x <file>]\n" - " [-T <num>][-U <num>] [-x <domain>]\n\n" + " [-r <file>][-T <num>][-U <num>] [-x <domain>]\n\n" "Usage:\n" " -i <file.pcap|device> | Specify a pcap file/playlist to read packets from or a\n" " | device for live capture (comma-separated list)\n" @@ -465,9 +466,9 @@ static void help(u_int long_help) { " | <d> = max packet payload dissection\n" " | <d> = max num reported payloads\n" " | Default: %u:%u:%u:%u:%u\n" - " -r | Print nDPI version and git revision\n" " -c <path> | Load custom categories from the specified file\n" " -C <path> | Write output in CSV format on the specified file\n" + " -r <path> | Load risky domain file\n" " -w <path> | Write test output on the specified file. This is useful for\n" " | testing purposes in order to compare results across runs\n" " -h | This help\n" @@ -762,7 +763,7 @@ static void parseOptions(int argc, char **argv) { } #endif - while((opt = getopt_long(argc, argv, "b:e:c:C:dDf:g:i:Ihp:P:l:s:tu:v:V:n:Jrp:x:w:q0123:456:7:89:m:T:U:", + while((opt = getopt_long(argc, argv, "b:e:c:C:dDf:g:i:Ihp:P:l:r:s:tu:v:V:n:Jrp:x:w:q0123:456:7:89:m:T:U:", longopts, &option_idx)) != EOF) { #ifdef DEBUG_TRACE if(trace) fprintf(trace, " #### -%c [%s] #### \n", opt, optarg ? optarg : ""); @@ -829,6 +830,10 @@ static void parseOptions(int argc, char **argv) { printf("Unable to write on CSV file %s\n", optarg); break; + case 'r': + _riskyDomainFilePath = optarg; + break; + case 's': capture_for = atoi(optarg); capture_until = capture_for + time(NULL); @@ -838,10 +843,6 @@ static void parseOptions(int argc, char **argv) { decode_tunnels = 1; break; - case 'r': - printf("ndpiReader - nDPI (%s)\n", ndpi_revision()); - exit(0); - case 'v': verbose = atoi(optarg); break; @@ -2056,6 +2057,9 @@ static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle) { if(_customCategoryFilePath) ndpi_load_categories_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _customCategoryFilePath); + if(_riskyDomainFilePath) + ndpi_load_risk_domain_file(ndpi_thread_info[thread_id].workflow->ndpi_struct, _riskyDomainFilePath); + ndpi_finalize_initalization(ndpi_thread_info[thread_id].workflow->ndpi_struct); if(enable_doh_dot_detection) diff --git a/example/risky_domains.txt b/example/risky_domains.txt new file mode 100644 index 000000000..5e0eb5af0 --- /dev/null +++ b/example/risky_domains.txt @@ -0,0 +1,11 @@ +# https://www.spamhaus.org/statistics/tlds/ +.fail +.viajes +.exposed +.london +.work +.gq +.ml +.surk +.tk +.xyz diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in index e701c51d4..8163c0553 100644 --- a/src/include/ndpi_api.h.in +++ b/src/include/ndpi_api.h.in @@ -695,6 +695,16 @@ extern "C" { */ int ndpi_load_categories_file(struct ndpi_detection_module_struct *ndpi_str, const char* path); + /** + * Read a file and load the list of risky domains + * + * @par ndpi_mod = the detection module + * @par path = the path of the file + * @return 0 if the file is loaded correctly; + * -1 else + */ + int ndpi_load_risk_domain_file(struct ndpi_detection_module_struct *ndpi_str, const char* path); + /** * Get the total number of the supported protocols * diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h index 3f86ec416..795d78b02 100644 --- a/src/include/ndpi_typedefs.h +++ b/src/include/ndpi_typedefs.h @@ -1094,11 +1094,12 @@ struct ndpi_detection_module_struct { u_int ndpi_num_supported_protocols; u_int ndpi_num_custom_protocols; - /* HTTP/DNS/HTTPS host matching */ + /* HTTP/DNS/HTTPS/QUIC host matching */ ndpi_automa host_automa, /* Used for DNS/HTTPS */ content_automa, /* Used for HTTP subprotocol_detection */ subprotocol_automa, /* Used for HTTP subprotocol_detection */ - bigrams_automa, impossible_bigrams_automa; /* TOR */ + bigrams_automa, impossible_bigrams_automa, /* TOR */ + risky_domain_automa; /* IMPORTANT: please update ndpi_finalize_initalization() whenever you add a new automa */ struct { diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index eaf6206b8..e00852aa5 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -2117,7 +2117,8 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs ndpi_str->content_automa.ac_automa = ac_automata_init(ac_match_handler); ndpi_str->bigrams_automa.ac_automa = ac_automata_init(ac_match_handler); ndpi_str->impossible_bigrams_automa.ac_automa = ac_automata_init(ac_match_handler); - + ndpi_str->risky_domain_automa.ac_automa = NULL; /* Initialized on demand */ + if((sizeof(categories) / sizeof(char *)) != NDPI_PROTOCOL_NUM_CATEGORIES) { NDPI_LOG_ERR(ndpi_str, "[NDPI] invalid categories length: expected %u, got %u\n", NDPI_PROTOCOL_NUM_CATEGORIES, (unsigned int) (sizeof(categories) / sizeof(char *))); @@ -2149,7 +2150,7 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(ndpi_init_prefs void ndpi_finalize_initalization(struct ndpi_detection_module_struct *ndpi_str) { u_int i; - for (i = 0; i < 4; i++) { + for (i = 0; i < 5; i++) { ndpi_automa *automa; switch(i) { @@ -2169,12 +2170,16 @@ void ndpi_finalize_initalization(struct ndpi_detection_module_struct *ndpi_str) automa = &ndpi_str->impossible_bigrams_automa; break; + case 4: + automa = &ndpi_str->risky_domain_automa; + break; + default: automa = NULL; break; } - if(automa) { + if(automa && automa->ac_automa) { ac_automata_finalize((AC_AUTOMATA_t *) automa->ac_automa); automa->ac_automa_finalized = 1; } @@ -2426,6 +2431,9 @@ void ndpi_exit_detection_module(struct ndpi_detection_module_struct *ndpi_str) { if(ndpi_str->impossible_bigrams_automa.ac_automa != NULL) ac_automata_release((AC_AUTOMATA_t *) ndpi_str->impossible_bigrams_automa.ac_automa, 0); + if(ndpi_str->risky_domain_automa.ac_automa != NULL) + ac_automata_release((AC_AUTOMATA_t *) ndpi_str->risky_domain_automa.ac_automa, 0); + if(ndpi_str->custom_categories.hostnames.ac_automa != NULL) ac_automata_release((AC_AUTOMATA_t *) ndpi_str->custom_categories.hostnames.ac_automa, 1 /* free patterns strings memory */); @@ -2805,6 +2813,73 @@ int ndpi_load_categories_file(struct ndpi_detection_module_struct *ndpi_str, con /* ******************************************************************** */ +static int ndpi_load_risky_domain(struct ndpi_detection_module_struct *ndpi_str, + char* domain_name) { + if(ndpi_str->risky_domain_automa.ac_automa == NULL) + ndpi_str->risky_domain_automa.ac_automa = ac_automata_init(ac_match_handler); + + if(ndpi_str->risky_domain_automa.ac_automa) { + char buf[64]; + u_int i, len; + + snprintf(buf, sizeof(buf)-1, "%s$", domain_name); + for (i = 0, len = strlen(buf)-1 /* Skip $ */; i < len; i++) buf[i] = tolower(buf[i]); + + return(ndpi_add_string_to_automa(ndpi_str->risky_domain_automa.ac_automa, buf)); + } + + return(-1); +} + +/* ******************************************************************** */ + +/* + * Format: + * + * <domain name> + * + * Notes: + * - you can add a .<domain name> to avoid mismatches + */ +int ndpi_load_risk_domain_file(struct ndpi_detection_module_struct *ndpi_str, const char *path) { + char buffer[128], *line; + FILE *fd; + int len, num = 0; + + fd = fopen(path, "r"); + + if(fd == NULL) { + NDPI_LOG_ERR(ndpi_str, "Unable to open file %s [%s]\n", path, strerror(errno)); + return(-1); + } + + while(1) { + line = fgets(buffer, sizeof(buffer), fd); + + if(line == NULL) + break; + + len = strlen(line); + + if((len <= 1) || (line[0] == '#')) + continue; + + line[len - 1] = '\0'; + + if(ndpi_load_risky_domain(ndpi_str, line) >= 0) + num++; + } + + fclose(fd); + + if(ndpi_str->risky_domain_automa.ac_automa) + ac_automata_finalize((AC_AUTOMATA_t *)ndpi_str->risky_domain_automa.ac_automa); + + return(num); +} + +/* ******************************************************************** */ + /* Format: <tcp|udp>:<port>,<tcp|udp>:<port>,.....@<proto> @@ -6415,9 +6490,11 @@ uint8_t ndpi_connection_tracking(struct ndpi_detection_module_struct *ndpi_str, /* ****************************************************** */ - u_int16_t ndpi_match_host_subprotocol(struct ndpi_detection_module_struct *ndpi_str, struct ndpi_flow_struct *flow, + u_int16_t ndpi_match_host_subprotocol(struct ndpi_detection_module_struct *ndpi_str, + struct ndpi_flow_struct *flow, char *string_to_match, u_int string_to_match_len, - ndpi_protocol_match_result *ret_match, u_int16_t master_protocol_id) { + ndpi_protocol_match_result *ret_match, + u_int16_t master_protocol_id) { u_int16_t rc, buf_len, i; ndpi_protocol_category_t id; char buf[96]; @@ -6427,8 +6504,7 @@ uint8_t ndpi_connection_tracking(struct ndpi_detection_module_struct *ndpi_str, buf[i++] = '$'; /* Add trailer $ */ buf[i] = '\0'; - rc = ndpi_automa_match_string_subprotocol(ndpi_str, flow, - buf, i, + rc = ndpi_automa_match_string_subprotocol(ndpi_str, flow, buf, i, master_protocol_id, ret_match, 1); id = ret_match->protocol_category; @@ -6439,12 +6515,20 @@ uint8_t ndpi_connection_tracking(struct ndpi_detection_module_struct *ndpi_str, } } + if(ndpi_str->risky_domain_automa.ac_automa != NULL) { + u_int16_t rc1 = ndpi_match_string(ndpi_str->risky_domain_automa.ac_automa, buf); + + if(rc1 > 0) + NDPI_SET_BIT(flow->risk, NDPI_RISKY_DOMAIN); + } + return(rc); } /* **************************************** */ - int ndpi_match_hostname_protocol(struct ndpi_detection_module_struct *ndpi_struct, struct ndpi_flow_struct *flow, + int ndpi_match_hostname_protocol(struct ndpi_detection_module_struct *ndpi_struct, + struct ndpi_flow_struct *flow, u_int16_t master_protocol, char *name, u_int name_len) { ndpi_protocol_match_result ret_match; u_int16_t subproto, what_len; @@ -6455,7 +6539,8 @@ uint8_t ndpi_connection_tracking(struct ndpi_detection_module_struct *ndpi_str, else what = name, what_len = name_len; - subproto = ndpi_match_host_subprotocol(ndpi_struct, flow, what, what_len, &ret_match, master_protocol); + subproto = ndpi_match_host_subprotocol(ndpi_struct, flow, what, what_len, + &ret_match, master_protocol); if(subproto != NDPI_PROTOCOL_UNKNOWN) { ndpi_set_detected_protocol(ndpi_struct, flow, subproto, master_protocol); diff --git a/tests/pcap/tk.pcap b/tests/pcap/tk.pcap Binary files differnew file mode 100644 index 000000000..25a655b1f --- /dev/null +++ b/tests/pcap/tk.pcap diff --git a/tests/result/tk.pcap.out b/tests/result/tk.pcap.out new file mode 100644 index 000000000..59e428e5f --- /dev/null +++ b/tests/result/tk.pcap.out @@ -0,0 +1,5 @@ +DNS 6 566 3 + + 1 UDP 192.168.1.178:53820 <-> 192.168.1.1:53 [proto: 5/DNS][cat: Network/14][1 pkts/72 bytes <-> 1 pkts/131 bytes][Goodput ratio: 41/67][0.05 sec][Host: whois.dot.tk][::][Risk: ** Risky domain name **][PLAIN TEXT (freenom)][Plen Bins: 50,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] + 2 UDP 192.168.1.178:55591 <-> 192.168.1.1:53 [proto: 5/DNS][cat: Network/14][1 pkts/72 bytes <-> 1 pkts/131 bytes][Goodput ratio: 41/67][0.06 sec][Host: whois.dot.tk][::][Risk: ** Risky domain name **][PLAIN TEXT (freenom)][Plen Bins: 50,0,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] + 3 UDP 192.168.1.178:51954 <-> 192.168.1.1:53 [proto: 5/DNS][cat: Network/14][1 pkts/72 bytes <-> 1 pkts/88 bytes][Goodput ratio: 41/52][0.10 sec][Host: whois.dot.tk][104.155.55.158][Risk: ** Risky domain name **][Plen Bins: 50,50,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] |