aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/lib/ndpi_content_match.c.inc47
-rw-r--r--src/lib/ndpi_main.c117
2 files changed, 133 insertions, 31 deletions
diff --git a/src/lib/ndpi_content_match.c.inc b/src/lib/ndpi_content_match.c.inc
index facd57544..7828c50f7 100644
--- a/src/lib/ndpi_content_match.c.inc
+++ b/src/lib/ndpi_content_match.c.inc
@@ -9233,6 +9233,10 @@ static ndpi_category_match category_match[] = {
word or just random chars.
http://www3.nd.edu/~busiforc/handouts/cryptography/Letter%20Frequencies.html
+
+ DGA Datasets
+ - https://data.netlab.360.com/dga/
+ - https://github.com/baderj/domain_generation_algorithms
*/
static const char *ndpi_en_bigrams[] = {
@@ -9273,6 +9277,49 @@ static const char *ndpi_en_bigrams[] = {
/* ******************************************************************** */
+static const char *ndpi_en_popular_bigrams[] = {
+ "th", "he", "in", "er", "an", "re", "on", "at", "en", "nd", "ti", "es", "or", "te", "of", "ed", "is", "it",
+ "al", "ar", "st", "to", "nt", "ng", "se", "ha", "as", "ou", "io", "le", "ve", "co", "me", "de", "hi", "ri",
+ "ro", "ic", "ne", "ea", "ra", "ce", "li", "ch", "ll", "be", "ma", "si", "om", "ur", "ca", "el", "ta", "la",
+ "ns", "di", "fo", "ho", "pe", "ec", "pr", "no", "ct", "us", "ac", "ot", "il", "tr", "ly", "nc", "et", "ut",
+ "ss", "so", "rs", "un", "lo", "wa", "ge", "ie", "wh", "ee", "wi", "em", "ad", "ol", "rt", "po", "we", "na",
+ "ul", "ni", "ts", "mo", "ow", "pa", "im", "mi", "ai", "sh", "ir", "su", "id", "os", "iv", "ia", "am", "fi",
+ "ci", "vi", "pl", "ig", "tu", "ev", "ld", "ry", "mp", "fe", "bl", "ab", "gh", "ty", "op", "wo", "sa", "ay",
+ "ex", "ke", "fr", "oo", "av", "ag", "if", "ap", "gr", "od", "bo", "sp", "rd", "do", "uc", "bu", "ei", "ov",
+ "by", "rm", "ep", "tt", "oc", "fa", "ef", "cu", "rn", "sc", "gi", "da", "yo", "cr", "cl", "du", "ga", "qu",
+ "ue", "ff", "ba", "ey", "ls", "va", "um", "pp", "ua", "up", "lu", "go", "ht", "ru", "ug", "ds", "lt", "pi",
+ "rc", "rr", "eg", "au", "ck", "ew", "mu", "br", "bi", "pt", "ak", "pu", "ui", "rg", "ib", "tl", "ny", "ki",
+ "rk", "ys", "ob", "mm", "fu", "ph", "og", "ms", "ye", "ud", "mb", "ip", "ub", "oi", "rl", "gu", "dr", "hr",
+ "cc", "tw", "ft", "wn", "nu", "af", "hu", "nn", "eo", "vo", "rv", "nf", "xp", "gn", "sm", "fl", "iz", "ok",
+ "nl", "my", "gl", "aw", "ju", "oa", "eq", "sy", "sl", "ps", "jo", "lf", "nv", "je", "nk", "kn", "gs", "dy",
+ "hy", "ze", "ks", "xt", "bs", "ik", "dd", "cy", "rp", "sk", "xi", "oe", "oy", "ws", "lv", "dl", "rf", "eu",
+ "dg", "wr", "xa", "yi", "nm", "eb", "rb", "tm", "xc", "eh", "tc", "gy", "ja", "hn", "yp", "za", "gg", "ym",
+ "sw", "bj", "lm", "cs", "ii", "ix", "xe", "oh", "lk", "dv", "lp", "ax", "ox", "uf", "dm", "iu", "sf", "bt",
+ "ka", "yt", "ek", "pm", "ya", "gt", "wl", "rh", "yl", "hs", "ah", "yc", "yn", "rw", "hm", "lw", "hl", "ae",
+ "zi", "az", "lc", "py", "aj", "iq", "nj", "bb", "nh", "uo", "kl", "lr", "tn", "gm", "sn", "nr", "fy", "mn",
+ "dw", "sb", "yr", "dn", "sq", "zo", "oj", "yd", "lb", "wt", "lg", "ko", "np", "sr", "nq", "ky", "ln", "nw",
+ "tf", "fs", "cq", "dh", "sd", "vy", "dj", "hw", "xu", "ao", "ml", "uk", "uy", "ej", "ez", "hb", "nz", "nb",
+ "mc", "yb", "tp", "xh", "ux", "tz", "bv", "mf", "wd", "oz", "yw", "kh", "gd", "bm", "mr", "ku", "uv", "dt",
+ "hd", "aa", "xx", "df", "db", "ji", "kr", "xo", "cm", "zz", "nx", "yg", "xy", "kg", "tb", "dc", "bd", "sg",
+ "wy", "zy", "aq", "hf", "cd", "vu", "kw", "zu", "bn", "ih", "tg", "xv", "uz", "bc", "xf", "yz", "km", "dp",
+ "lh", "wf", "kf", "pf", "cf", "mt", "yu", "cp", "pb", "td", "zl", "sv", "hc", "mg", "pw", "gf", "pd", "pn",
+ "pc", "rx", "tv", "ij", "wm", "uh", "wk", "wb", "bh", "oq", "kt", "rq", "kb", "cg", "vr", "cn", "pk", "uu",
+ "yf", "wp", "cz", "kp", "dq", "wu", "fm", "wc", "md", "kd", "zh", "gw", "rz", "cb", "iw", "xl", "hp", "mw",
+ "vs", "fc", "rj", "bp", "mh", "hh", "yh", "uj", "fg", "fd", "gb", "pg", "tk", "kk", "hq", "fn", "lz", "vl",
+ "gp", "hz", "dk", "yk", "qi", "lx", "vd", "zs", "bw", "xq", "mv", "uw", "hg", "fb", "sj", "ww", "gk", "uq",
+ "bg", "sz", "jr", "ql", "zt", "hk", "vc", "xm", "gc", "fw", "pz", "kc", "hv", "xw", "zw", "fp", "iy", "pv",
+ "vt", "jp", "cv", "zb", "vp", "zr", "fh", "yv", "zg", "zm", "zv", "qs", "kv", "vn", "zn", "qa", "yx", "jn",
+ "bf", "mk", "cw", "jm", "lq", "jh", "kj", "jc", "gz", "js", "tx", "fk", "jl", "vm", "lj", "tj", "jj", "cj",
+ "vg", "mj", "jt", "pj", "wg", "vh", "bk", "vv", "jd", "tq", "vb", "jf", "dz", "xb", "jb", "zc", "fj", "yy",
+ "qn", "xs", "qr", "jk", "jv", "qq", "xn", "vf", "px", "zd", "qt", "zp", "qo", "dx", "hj", "gv", "jw", "qc",
+ "jy", "gj", "qb", "pq", "jg", "bz", "mx", "qm", "mz", "qf", "wj", "zq", "xr", "zk", "cx", "fx", "fv", "bx",
+ "vw", "vj", "mq", "qv", "zf", "qe", "yj", "gx", "kx", "xg", "qd", "xj", "sx", "vz", "vx", "wv", "yq", "bq",
+ "gq", "vk", "zj", "xk", "qp", "hx", "fz", "qh", "qj", "jz", "vq", "kq", "xd", "qw", "jx", "qx", "kz", "wx",
+ "fq", "xz", "zx", "jq", "qg", "qk", "qy", "qz", "wq", "wz", NULL
+ };
+
+/* ******************************************************************** */
+
static const char *ndpi_en_impossible_bigrams[] = {
"bk", "bq", "bx", "cb", "cf", "cg", "cj", "cp", "cv", "cw", "cx", "dx", "fk", "fq", "fv", "fx", /* "ee", removed it can be found in 'meeting' */
"fz", "gq", "gv", "gx", "hh", "hk", "hv", "hx", "hz", "iy", "jb", /* "jc", jcrew.com */ "jd", "jf", "jg", "jh", "jk",
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index 00ecadb41..a44107679 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -571,9 +571,14 @@ static void init_string_based_protocols(struct ndpi_detection_module_struct *ndp
// ac_automata_display(ndpi_str->host_automa.ac_automa, 'n');
#endif
+#if 1
for (i = 0; ndpi_en_bigrams[i] != NULL; i++)
ndpi_string_to_automa(ndpi_str, &ndpi_str->bigrams_automa, (char *) ndpi_en_bigrams[i], 1, 1, 1, 0);
-
+#else
+ for (i = 0; ndpi_en_popular_bigrams[i] != NULL; i++)
+ ndpi_string_to_automa(ndpi_str, &ndpi_str->bigrams_automa, (char *) ndpi_en_popular_bigrams[i], 1, 1, 1, 0);
+#endif
+
for (i = 0; ndpi_en_impossible_bigrams[i] != NULL; i++)
ndpi_string_to_automa(ndpi_str, &ndpi_str->impossible_bigrams_automa, (char *) ndpi_en_impossible_bigrams[i], 1,
1, 1, 0);
@@ -4372,6 +4377,13 @@ static void ndpi_reconcile_protocols(struct ndpi_detection_module_struct *ndpi_s
Skype for a host doing MS Teams means MS Teams
(MS Teams uses Skype as transport protocol for voice/video)
*/
+
+ if(flow) {
+ /* Do not go for DNS when there is an application protocol. Example DNS.Apple */
+ if((flow->detected_protocol_stack[1] != NDPI_PROTOCOL_UNKNOWN)
+ && (flow->detected_protocol_stack[0] /* app */ != flow->detected_protocol_stack[1] /* major */))
+ NDPI_CLR_BIT(flow->risk, NDPI_SUSPICIOUS_DGA_DOMAIN);
+ }
switch(ret->app_protocol) {
case NDPI_PROTOCOL_MSTEAMS:
@@ -6498,58 +6510,101 @@ static int enough(int a, int b) {
/* ******************************************************************** */
+// #define DGA_DEBUG 1
+
int ndpi_check_dga_name(struct ndpi_detection_module_struct *ndpi_str,
struct ndpi_flow_struct *flow,
char *name) {
- int len = strlen(name), rc = 0;
+ int len, rc = 0;
+ len = strlen(name);
+
if(len >= 5) {
- int i, j, num_found = 0, num_impossible = 0, num_bigram_checks = 0;
- char tmp[128];
+ int i, j, num_found = 0, num_impossible = 0, num_bigram_checks = 0, num_digits = 0, num_vowels = 0, num_words = 0;
+ char tmp[128], *word, *tok_tmp;
len = snprintf(tmp, sizeof(tmp)-1, "%s", name);
if(len < 0) return(0);
for(i=0, j=0; (i<len) && (j<(sizeof(tmp)-1)); i++) {
- if(isdigit(name[i]))
- continue;
- else
tmp[j++] = tolower(name[i]);
}
tmp[j] = '\0';
len = j;
- for(i = 0; i < len; i++) {
- if(isdigit(tmp[i])) continue;
+ for(word = strtok_r(tmp, ".", &tok_tmp); ; word = strtok_r(NULL, ".", &tok_tmp)) {
+ if(!word) break;
- switch(tmp[i]) {
- case '-':
- case ':':
- case '.':
- continue;
- break;
- }
+ num_words++;
+
+ if(strlen(word) < 3) continue;
- if(isdigit(tmp[i+1])) continue;
+#ifdef DGA_DEBUG
+ printf("-> %s [%s][len: %u]\n", word, name, (unsigned int)strlen(word));
+#endif
+
+ for(i = 0; word[i+1] != '\0'; i++) {
+ if(isdigit(word[i])) {
+ num_digits++;
+
+ // if(!isdigit(word[i+1])) num_impossible++;
+
+ continue;
+ }
+
+ switch(word[i]) {
+ case '_':
+ case '-':
+ case ':':
+ continue;
+ break;
+
+ case '.':
+ continue;
+ break;
+ }
- num_bigram_checks++;
+ switch(word[i]) {
+ case 'a':
+ case 'e':
+ case 'i':
+ case 'o':
+ case 'u':
+ num_vowels++;
+ break;
+ }
+
+ if(isdigit(word[i+1])) {
+ num_digits++;
+ // num_impossible++;
+ continue;
+ }
+
+ num_bigram_checks++;
- if(ndpi_match_bigram(ndpi_str, &ndpi_str->bigrams_automa, &tmp[i])) {
- num_found++;
- } else if(ndpi_match_bigram(ndpi_str,
- &ndpi_str->impossible_bigrams_automa,
- &tmp[i])) {
+ if(ndpi_match_bigram(ndpi_str, &ndpi_str->bigrams_automa, &word[i])) {
+ num_found++;
+ } else {
+ if(ndpi_match_bigram(ndpi_str,
+ &ndpi_str->impossible_bigrams_automa,
+ &word[i])) {
#ifdef DGA_DEBUG
- printf("IMPOSSIBLE %s\n", &tmp[i]);
+ printf("IMPOSSIBLE %s\n", &word[i]);
#endif
- num_impossible++;
- }
- }
+ num_impossible++;
+ }
+ }
+ } /* for */
+ } /* for */
+#ifdef DGA_DEBUG
+ printf("[num_found: %u][num_impossible: %u][num_digits: %u][num_bigram_checks: %u][num_vowels: %u/%u]\n",
+ num_found, num_impossible, num_digits, num_bigram_checks, num_vowels, j-num_vowels);
+#endif
+
if(num_bigram_checks
- && (num_impossible > 0)
- && ((num_found == 0) || enough(num_found, num_impossible)))
+ && ((num_found == 0) || ((num_digits > 5) && (num_words <= 3)) || enough(num_found, num_impossible)))
rc = 1;
if(rc && flow)
@@ -6557,8 +6612,8 @@ int ndpi_check_dga_name(struct ndpi_detection_module_struct *ndpi_str,
#ifdef DGA_DEBUG
if(rc)
- printf("DGA %s [%s][num_found: %u][num_impossible: %u]\n",
- tmp, name, num_found, num_impossible);
+ printf("DGA %s [num_found: %u][num_impossible: %u]\n",
+ name, num_found, num_impossible);
#endif
}