diff options
author | Luca Deri <deri@ntop.org> | 2021-03-08 22:57:30 +0100 |
---|---|---|
committer | Luca Deri <deri@ntop.org> | 2021-03-08 22:57:30 +0100 |
commit | bb6423a79f80898d140c946dd08ea571ae95d5e7 (patch) | |
tree | 4f1d13d3e0585d11a538247f473f6e9eee5b0c4d /src | |
parent | c3490e80a743146b35274dfb92abe5e075e00d01 (diff) |
Added the ability to define a custom DGA detection function by overwriting
the value of the function pointer ndpi_dga_function curently set to NULL
(that means the nDPI internal DGA function will be used)
Diffstat (limited to 'src')
-rw-r--r-- | src/include/ndpi_api.h.in | 7 | ||||
-rw-r--r-- | src/include/ndpi_typedefs.h | 3 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 442 |
3 files changed, 238 insertions, 214 deletions
diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in index 6a4b18366..c99e978a4 100644 --- a/src/include/ndpi_api.h.in +++ b/src/include/ndpi_api.h.in @@ -47,7 +47,12 @@ extern "C" { #define SAVE_DETECTION_BITMASK_AS_UNKNOWN 1 #define NO_SAVE_DETECTION_BITMASK_AS_UNKNOWN 0 - + /* + In case a custom DGA function is used, the fucntion + below must be overwritten, + */ + extern ndpi_custom_dga_predict_fctn ndpi_dga_function; + /** * Check if a string is encoded with punycode * ( https://tools.ietf.org/html/rfc3492 ) diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h index 87c31e354..cd167a24e 100644 --- a/src/include/ndpi_typedefs.h +++ b/src/include/ndpi_typedefs.h @@ -1662,6 +1662,9 @@ struct ndpi_hw_struct { double *s; }; +/* Prototype used to define custom DGA detection function */ +typedef int (*ndpi_custom_dga_predict_fctn)(const char* domain, int domain_length); + /* **************************************** */ #endif /* __NDPI_TYPEDEFS_H__ */ diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index 44f0387bf..a00f9bdd4 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -84,6 +84,10 @@ static int removeDefaultPort(ndpi_port_range *range, ndpi_proto_defaults_t *def, /* ****************************************** */ +ndpi_custom_dga_predict_fctn ndpi_dga_function = NULL; + +/* ****************************************** */ + static inline uint8_t flow_is_proto(struct ndpi_flow_struct *flow, u_int16_t p) { return((flow->detected_protocol_stack[0] == p) || (flow->detected_protocol_stack[1] == p)); } @@ -7311,268 +7315,280 @@ uint8_t ndpi_connection_tracking(struct ndpi_detection_module_struct *ndpi_str, int ndpi_check_dga_name(struct ndpi_detection_module_struct *ndpi_str, struct ndpi_flow_struct *flow, char *name, u_int8_t is_hostname) { - int len, rc = 0, trigram_char_skip = 0; - u_int8_t max_num_char_repetitions = 0, last_char = 0, num_char_repetitions = 0, num_dots = 0, num_trigram_dots = 0; - u_int8_t max_domain_element_len = 0, curr_domain_element_len = 0, first_element_is_numeric = 1; - - if((!name) - || (strchr(name, '_') != NULL) - || (endsWith(name, "in-addr.arpa", 12))) - return(0); + if(ndpi_dga_function != NULL) { + /* A custom DGA function is defined */ + int rc = ndpi_dga_function(name, is_hostname); + + if(rc) { + if(flow) + ndpi_set_risk(flow, NDPI_SUSPICIOUS_DGA_DOMAIN); + } + + return(1); + } else { + int len, rc = 0, trigram_char_skip = 0; + u_int8_t max_num_char_repetitions = 0, last_char = 0, num_char_repetitions = 0, num_dots = 0, num_trigram_dots = 0; + u_int8_t max_domain_element_len = 0, curr_domain_element_len = 0, first_element_is_numeric = 1; + + if((!name) + || (strchr(name, '_') != NULL) + || (endsWith(name, "in-addr.arpa", 12))) + return(0); - if(flow && (flow->packet.detected_protocol_stack[1] != NDPI_PROTOCOL_UNKNOWN)) - return(0); /* Ignore DGA check for protocols already fully detected */ + if(flow && (flow->packet.detected_protocol_stack[1] != NDPI_PROTOCOL_UNKNOWN)) + return(0); /* Ignore DGA check for protocols already fully detected */ - if(strncmp(name, "www.", 4) == 0) - name = &name[4]; + if(strncmp(name, "www.", 4) == 0) + name = &name[4]; - if(ndpi_verbose_dga_detection) - printf("[DGA check] %s\n", name); + if(ndpi_verbose_dga_detection) + printf("[DGA check] %s\n", name); - len = strlen(name); + len = strlen(name); - if(len >= 5) { - int i, j, num_found = 0, num_impossible = 0, num_bigram_checks = 0, - num_trigram_found = 0, num_trigram_checked = 0, num_dash = 0, - num_digits = 0, num_vowels = 0, num_trigram_vowels = 0, num_words = 0, skip_next_bigram = 0; - char tmp[128], *word, *tok_tmp; - u_int max_tmp_len = sizeof(tmp)-1; + if(len >= 5) { + int i, j, num_found = 0, num_impossible = 0, num_bigram_checks = 0, + num_trigram_found = 0, num_trigram_checked = 0, num_dash = 0, + num_digits = 0, num_vowels = 0, num_trigram_vowels = 0, num_words = 0, skip_next_bigram = 0; + char tmp[128], *word, *tok_tmp; + u_int max_tmp_len = sizeof(tmp)-1; - len = snprintf(tmp, max_tmp_len, "%s", name); - if(len < 0) { + len = snprintf(tmp, max_tmp_len, "%s", name); + if(len < 0) { - if(ndpi_verbose_dga_detection) - printf("[DGA] Too short"); + if(ndpi_verbose_dga_detection) + printf("[DGA] Too short"); - return(0); - } else - tmp[len < max_tmp_len ? len : max_tmp_len] = '\0'; + return(0); + } else + tmp[len < max_tmp_len ? len : max_tmp_len] = '\0'; - for(i=0, j=0; (i<len) && (j<max_tmp_len); i++) { - tmp[j] = tolower(name[i]); + for(i=0, j=0; (i<len) && (j<max_tmp_len); i++) { + tmp[j] = tolower(name[i]); - if(tmp[j] == '.') { - num_dots++; - } else if(num_dots == 0) { - if(!isdigit(tmp[j])) - first_element_is_numeric = 0; - } + if(tmp[j] == '.') { + num_dots++; + } else if(num_dots == 0) { + if(!isdigit(tmp[j])) + first_element_is_numeric = 0; + } - if(ndpi_is_vowel(tmp[j])) - num_vowels++; + if(ndpi_is_vowel(tmp[j])) + num_vowels++; - if(last_char == tmp[j]) { - if(++num_char_repetitions > max_num_char_repetitions) - max_num_char_repetitions = num_char_repetitions; - } else - num_char_repetitions = 1, last_char = tmp[j]; + if(last_char == tmp[j]) { + if(++num_char_repetitions > max_num_char_repetitions) + max_num_char_repetitions = num_char_repetitions; + } else + num_char_repetitions = 1, last_char = tmp[j]; - if(isdigit(tmp[j])) { - num_digits++; + if(isdigit(tmp[j])) { + num_digits++; - if(((j+2)<len) && isdigit(tmp[j+1]) && (tmp[j+2] == '.')) { - /* Check if there are too many digits */ - if(num_digits < 4) - return(0); /* Double digits */ + if(((j+2)<len) && isdigit(tmp[j+1]) && (tmp[j+2] == '.')) { + /* Check if there are too many digits */ + if(num_digits < 4) + return(0); /* Double digits */ + } } - } - switch(tmp[j]) { - case '.': - case '-': - case '_': - case '/': - case ')': - case '(': - case ';': - case ':': - case '[': - case ']': - case ' ': - /* - Domain/word separator chars - - NOTE: - this function is used also to detect other type of issues - such as invalid/suspiciuous user agent - */ - if(curr_domain_element_len > max_domain_element_len) - max_domain_element_len = curr_domain_element_len; + switch(tmp[j]) { + case '.': + case '-': + case '_': + case '/': + case ')': + case '(': + case ';': + case ':': + case '[': + case ']': + case ' ': + /* + Domain/word separator chars - curr_domain_element_len = 0; - break; + NOTE: + this function is used also to detect other type of issues + such as invalid/suspiciuous user agent + */ + if(curr_domain_element_len > max_domain_element_len) + max_domain_element_len = curr_domain_element_len; - default: - curr_domain_element_len++; - break; - } + curr_domain_element_len = 0; + break; - j++; - } + default: + curr_domain_element_len++; + break; + } - if(num_dots == 0) /* Doesn't look like a domain name */ - return(0); + j++; + } - if(curr_domain_element_len > max_domain_element_len) - max_domain_element_len = curr_domain_element_len; + if(num_dots == 0) /* Doesn't look like a domain name */ + return(0); - if(ndpi_verbose_dga_detection) - printf("[DGA] [max_num_char_repetitions: %u][max_domain_element_len: %u]\n", - max_num_char_repetitions, max_domain_element_len); - - if( - (is_hostname - && (num_dots > 5) - && (!first_element_is_numeric) - ) - || (max_num_char_repetitions > 5 /* num or consecutive repeated chars */) - /* - In case of a name with too many consecutive chars an alert is triggered - This is the case for instance of the wildcard DNS query used by NetBIOS - (ckaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) and that can be exploited - for reflection attacks - - https://www.akamai.com/uk/en/multimedia/documents/state-of-the-internet/ddos-reflection-netbios-name-server-rpc-portmap-sentinel-udp-threat-advisory.pdf - - http://ubiqx.org/cifs/NetBIOS.html - */ - || (max_domain_element_len >= 19 /* word too long. Example bbcbedxhgjmdobdprmen.com */) - ) { - if(flow) ndpi_set_risk(flow, NDPI_SUSPICIOUS_DGA_DOMAIN); + if(curr_domain_element_len > max_domain_element_len) + max_domain_element_len = curr_domain_element_len; if(ndpi_verbose_dga_detection) - printf("[DGA] Found!"); - - return(1); - } - - tmp[j] = '\0'; - len = j; - - for(word = strtok_r(tmp, ".", &tok_tmp); ; word = strtok_r(NULL, ".", &tok_tmp)) { - if(!word) break; + printf("[DGA] [max_num_char_repetitions: %u][max_domain_element_len: %u]\n", + max_num_char_repetitions, max_domain_element_len); + + if( + (is_hostname + && (num_dots > 5) + && (!first_element_is_numeric) + ) + || (max_num_char_repetitions > 5 /* num or consecutive repeated chars */) + /* + In case of a name with too many consecutive chars an alert is triggered + This is the case for instance of the wildcard DNS query used by NetBIOS + (ckaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) and that can be exploited + for reflection attacks + - https://www.akamai.com/uk/en/multimedia/documents/state-of-the-internet/ddos-reflection-netbios-name-server-rpc-portmap-sentinel-udp-threat-advisory.pdf + - http://ubiqx.org/cifs/NetBIOS.html + */ + || (max_domain_element_len >= 19 /* word too long. Example bbcbedxhgjmdobdprmen.com */) + ) { + if(flow) ndpi_set_risk(flow, NDPI_SUSPICIOUS_DGA_DOMAIN); - num_words++; + if(ndpi_verbose_dga_detection) + printf("[DGA] Found!"); - if(strlen(word) < 3) continue; + return(1); + } - if(ndpi_verbose_dga_detection) - printf("-> word(%s) [%s][len: %u]\n", word, name, (unsigned int)strlen(word)); + tmp[j] = '\0'; + len = j; - trigram_char_skip = 0; - - for(i = 0; word[i+1] != '\0'; i++) { - switch(word[i]) { - case '-': - num_dash++; - /* - Let's check for double+consecutive -- - that are usually ok - r2---sn-uxaxpu5ap5-2n5e.gvt1.com - */ - if(word[i+1] == '-') - return(0); /* Double dash */ + for(word = strtok_r(tmp, ".", &tok_tmp); ; word = strtok_r(NULL, ".", &tok_tmp)) { + if(!word) break; - case '_': - case ':': - continue; - break; + num_words++; - case '.': - continue; - break; - } - num_bigram_checks++; + if(strlen(word) < 3) continue; if(ndpi_verbose_dga_detection) - printf("-> Checking %c%c\n", word[i], word[i+1]); + printf("-> word(%s) [%s][len: %u]\n", word, name, (unsigned int)strlen(word)); + + trigram_char_skip = 0; + + for(i = 0; word[i+1] != '\0'; i++) { + switch(word[i]) { + case '-': + num_dash++; + /* + Let's check for double+consecutive -- + that are usually ok + r2---sn-uxaxpu5ap5-2n5e.gvt1.com + */ + if(word[i+1] == '-') + return(0); /* Double dash */ + + case '_': + case ':': + continue; + break; + + case '.': + continue; + break; + } + num_bigram_checks++; - if(ndpi_match_bigram(ndpi_str, - &ndpi_str->impossible_bigrams_automa, - &word[i])) { if(ndpi_verbose_dga_detection) - printf("IMPOSSIBLE %s\n", &word[i]); + printf("-> Checking %c%c\n", word[i], word[i+1]); - num_impossible++; - } else { - if(!skip_next_bigram) { - if(ndpi_match_bigram(ndpi_str, &ndpi_str->bigrams_automa, &word[i])) { - num_found++, skip_next_bigram = 1; - } - } else - skip_next_bigram = 0; - } + if(ndpi_match_bigram(ndpi_str, + &ndpi_str->impossible_bigrams_automa, + &word[i])) { + if(ndpi_verbose_dga_detection) + printf("IMPOSSIBLE %s\n", &word[i]); - if((num_trigram_dots < 2) && (word[i+2] != '\0')) { - if(ndpi_verbose_dga_detection) - printf("***> %s [trigram_char_skip: %u]\n", &word[i], trigram_char_skip); + num_impossible++; + } else { + if(!skip_next_bigram) { + if(ndpi_match_bigram(ndpi_str, &ndpi_str->bigrams_automa, &word[i])) { + num_found++, skip_next_bigram = 1; + } + } else + skip_next_bigram = 0; + } + + if((num_trigram_dots < 2) && (word[i+2] != '\0')) { + if(ndpi_verbose_dga_detection) + printf("***> %s [trigram_char_skip: %u]\n", &word[i], trigram_char_skip); - if(ndpi_is_trigram_char(word[i]) && ndpi_is_trigram_char(word[i+1]) && ndpi_is_trigram_char(word[i+2])) { - if(trigram_char_skip) { + if(ndpi_is_trigram_char(word[i]) && ndpi_is_trigram_char(word[i+1]) && ndpi_is_trigram_char(word[i+2])) { + if(trigram_char_skip) { trigram_char_skip--; - } else { - num_trigram_checked++; + } else { + num_trigram_checked++; - if(ndpi_match_trigram(ndpi_str, &ndpi_str->trigrams_automa, &word[i])) - num_trigram_found++, trigram_char_skip = 2 /* 1 char overlap */; - else if(ndpi_verbose_dga_detection) - printf("[NDPI] NO Trigram %c%c%c\n", word[i], word[i+1], word[i+2]); + if(ndpi_match_trigram(ndpi_str, &ndpi_str->trigrams_automa, &word[i])) + num_trigram_found++, trigram_char_skip = 2 /* 1 char overlap */; + else if(ndpi_verbose_dga_detection) + printf("[NDPI] NO Trigram %c%c%c\n", word[i], word[i+1], word[i+2]); - /* Count vowels */ - num_trigram_vowels += ndpi_is_vowel(word[i]) + ndpi_is_vowel(word[i+1]) + ndpi_is_vowel(word[i+2]); - } - } else { - if(word[i] == '.') - num_trigram_dots++; + /* Count vowels */ + num_trigram_vowels += ndpi_is_vowel(word[i]) + ndpi_is_vowel(word[i+1]) + ndpi_is_vowel(word[i+2]); + } + } else { + if(word[i] == '.') + num_trigram_dots++; - trigram_char_skip = 0; + trigram_char_skip = 0; + } } - } + } /* for */ } /* for */ - } /* for */ - if(ndpi_verbose_dga_detection) - printf("[%s][num_found: %u][num_impossible: %u][num_digits: %u][num_bigram_checks: %u][num_vowels: %u/%u][num_trigram_vowels: %u][num_trigram_found: %u/%u][vowels: %u][rc: %u]\n", - name, num_found, num_impossible, num_digits, num_bigram_checks, num_vowels, len, num_trigram_vowels, - num_trigram_checked, num_trigram_found, num_vowels, rc); - - if((len > 16) && (num_dots < 3) && ((num_vowels*4) < (len-num_dots))) { - if((num_trigram_checked > 2) && (num_trigram_vowels >= (num_trigram_found-1))) - ; /* skip me */ - else - rc = 1; - } + if(ndpi_verbose_dga_detection) + printf("[%s][num_found: %u][num_impossible: %u][num_digits: %u][num_bigram_checks: %u][num_vowels: %u/%u][num_trigram_vowels: %u][num_trigram_found: %u/%u][vowels: %u][rc: %u]\n", + name, num_found, num_impossible, num_digits, num_bigram_checks, num_vowels, len, num_trigram_vowels, + num_trigram_checked, num_trigram_found, num_vowels, rc); + + if((len > 16) && (num_dots < 3) && ((num_vowels*4) < (len-num_dots))) { + if((num_trigram_checked > 2) && (num_trigram_vowels >= (num_trigram_found-1))) + ; /* skip me */ + else + rc = 1; + } - if(num_bigram_checks - && (num_dots > 0) - && ((num_found == 0) || ((num_digits > 5) && (num_words <= 3)) - || enough(num_found, num_impossible) - || ((num_trigram_checked > 2) - && ((num_trigram_found < (num_trigram_checked/2)) - || ((num_trigram_vowels < (num_trigram_found-1)) && (num_dash == 0) && (num_dots > 1))) - ) - ) - ) - rc = 1; - - if((num_trigram_checked > 2) && (num_vowels == 0)) - rc = 1; - - if(num_dash > 2) - rc = 0; + if(num_bigram_checks + && (num_dots > 0) + && ((num_found == 0) || ((num_digits > 5) && (num_words <= 3)) + || enough(num_found, num_impossible) + || ((num_trigram_checked > 2) + && ((num_trigram_found < (num_trigram_checked/2)) + || ((num_trigram_vowels < (num_trigram_found-1)) && (num_dash == 0) && (num_dots > 1))) + ) + ) + ) + rc = 1; + + if((num_trigram_checked > 2) && (num_vowels == 0)) + rc = 1; + + if(num_dash > 2) + rc = 0; - if(ndpi_verbose_dga_detection) { - if(rc) - printf("DGA %s [num_found: %u][num_impossible: %u]\n", - name, num_found, num_impossible); + if(ndpi_verbose_dga_detection) { + if(rc) + printf("DGA %s [num_found: %u][num_impossible: %u]\n", + name, num_found, num_impossible); + } } - } - if(ndpi_verbose_dga_detection) - printf("[DGA] Result: %u\n", rc); + if(ndpi_verbose_dga_detection) + printf("[DGA] Result: %u\n", rc); - if(rc && flow) - ndpi_set_risk(flow, NDPI_SUSPICIOUS_DGA_DOMAIN); + if(rc && flow) + ndpi_set_risk(flow, NDPI_SUSPICIOUS_DGA_DOMAIN); - return(rc); + return(rc); + } } - + /* ******************************************************************** */ |