diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/ndpi_content_match.c.inc | 47 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 117 |
2 files changed, 133 insertions, 31 deletions
diff --git a/src/lib/ndpi_content_match.c.inc b/src/lib/ndpi_content_match.c.inc index facd57544..7828c50f7 100644 --- a/src/lib/ndpi_content_match.c.inc +++ b/src/lib/ndpi_content_match.c.inc @@ -9233,6 +9233,10 @@ static ndpi_category_match category_match[] = { word or just random chars. http://www3.nd.edu/~busiforc/handouts/cryptography/Letter%20Frequencies.html + + DGA Datasets + - https://data.netlab.360.com/dga/ + - https://github.com/baderj/domain_generation_algorithms */ static const char *ndpi_en_bigrams[] = { @@ -9273,6 +9277,49 @@ static const char *ndpi_en_bigrams[] = { /* ******************************************************************** */ +static const char *ndpi_en_popular_bigrams[] = { + "th", "he", "in", "er", "an", "re", "on", "at", "en", "nd", "ti", "es", "or", "te", "of", "ed", "is", "it", + "al", "ar", "st", "to", "nt", "ng", "se", "ha", "as", "ou", "io", "le", "ve", "co", "me", "de", "hi", "ri", + "ro", "ic", "ne", "ea", "ra", "ce", "li", "ch", "ll", "be", "ma", "si", "om", "ur", "ca", "el", "ta", "la", + "ns", "di", "fo", "ho", "pe", "ec", "pr", "no", "ct", "us", "ac", "ot", "il", "tr", "ly", "nc", "et", "ut", + "ss", "so", "rs", "un", "lo", "wa", "ge", "ie", "wh", "ee", "wi", "em", "ad", "ol", "rt", "po", "we", "na", + "ul", "ni", "ts", "mo", "ow", "pa", "im", "mi", "ai", "sh", "ir", "su", "id", "os", "iv", "ia", "am", "fi", + "ci", "vi", "pl", "ig", "tu", "ev", "ld", "ry", "mp", "fe", "bl", "ab", "gh", "ty", "op", "wo", "sa", "ay", + "ex", "ke", "fr", "oo", "av", "ag", "if", "ap", "gr", "od", "bo", "sp", "rd", "do", "uc", "bu", "ei", "ov", + "by", "rm", "ep", "tt", "oc", "fa", "ef", "cu", "rn", "sc", "gi", "da", "yo", "cr", "cl", "du", "ga", "qu", + "ue", "ff", "ba", "ey", "ls", "va", "um", "pp", "ua", "up", "lu", "go", "ht", "ru", "ug", "ds", "lt", "pi", + "rc", "rr", "eg", "au", "ck", "ew", "mu", "br", "bi", "pt", "ak", "pu", "ui", "rg", "ib", "tl", "ny", "ki", + "rk", "ys", "ob", "mm", "fu", "ph", "og", "ms", "ye", "ud", "mb", "ip", "ub", "oi", "rl", "gu", "dr", "hr", + "cc", "tw", "ft", "wn", "nu", "af", "hu", "nn", "eo", "vo", "rv", "nf", "xp", "gn", "sm", "fl", "iz", "ok", + "nl", "my", "gl", "aw", "ju", "oa", "eq", "sy", "sl", "ps", "jo", "lf", "nv", "je", "nk", "kn", "gs", "dy", + "hy", "ze", "ks", "xt", "bs", "ik", "dd", "cy", "rp", "sk", "xi", "oe", "oy", "ws", "lv", "dl", "rf", "eu", + "dg", "wr", "xa", "yi", "nm", "eb", "rb", "tm", "xc", "eh", "tc", "gy", "ja", "hn", "yp", "za", "gg", "ym", + "sw", "bj", "lm", "cs", "ii", "ix", "xe", "oh", "lk", "dv", "lp", "ax", "ox", "uf", "dm", "iu", "sf", "bt", + "ka", "yt", "ek", "pm", "ya", "gt", "wl", "rh", "yl", "hs", "ah", "yc", "yn", "rw", "hm", "lw", "hl", "ae", + "zi", "az", "lc", "py", "aj", "iq", "nj", "bb", "nh", "uo", "kl", "lr", "tn", "gm", "sn", "nr", "fy", "mn", + "dw", "sb", "yr", "dn", "sq", "zo", "oj", "yd", "lb", "wt", "lg", "ko", "np", "sr", "nq", "ky", "ln", "nw", + "tf", "fs", "cq", "dh", "sd", "vy", "dj", "hw", "xu", "ao", "ml", "uk", "uy", "ej", "ez", "hb", "nz", "nb", + "mc", "yb", "tp", "xh", "ux", "tz", "bv", "mf", "wd", "oz", "yw", "kh", "gd", "bm", "mr", "ku", "uv", "dt", + "hd", "aa", "xx", "df", "db", "ji", "kr", "xo", "cm", "zz", "nx", "yg", "xy", "kg", "tb", "dc", "bd", "sg", + "wy", "zy", "aq", "hf", "cd", "vu", "kw", "zu", "bn", "ih", "tg", "xv", "uz", "bc", "xf", "yz", "km", "dp", + "lh", "wf", "kf", "pf", "cf", "mt", "yu", "cp", "pb", "td", "zl", "sv", "hc", "mg", "pw", "gf", "pd", "pn", + "pc", "rx", "tv", "ij", "wm", "uh", "wk", "wb", "bh", "oq", "kt", "rq", "kb", "cg", "vr", "cn", "pk", "uu", + "yf", "wp", "cz", "kp", "dq", "wu", "fm", "wc", "md", "kd", "zh", "gw", "rz", "cb", "iw", "xl", "hp", "mw", + "vs", "fc", "rj", "bp", "mh", "hh", "yh", "uj", "fg", "fd", "gb", "pg", "tk", "kk", "hq", "fn", "lz", "vl", + "gp", "hz", "dk", "yk", "qi", "lx", "vd", "zs", "bw", "xq", "mv", "uw", "hg", "fb", "sj", "ww", "gk", "uq", + "bg", "sz", "jr", "ql", "zt", "hk", "vc", "xm", "gc", "fw", "pz", "kc", "hv", "xw", "zw", "fp", "iy", "pv", + "vt", "jp", "cv", "zb", "vp", "zr", "fh", "yv", "zg", "zm", "zv", "qs", "kv", "vn", "zn", "qa", "yx", "jn", + "bf", "mk", "cw", "jm", "lq", "jh", "kj", "jc", "gz", "js", "tx", "fk", "jl", "vm", "lj", "tj", "jj", "cj", + "vg", "mj", "jt", "pj", "wg", "vh", "bk", "vv", "jd", "tq", "vb", "jf", "dz", "xb", "jb", "zc", "fj", "yy", + "qn", "xs", "qr", "jk", "jv", "qq", "xn", "vf", "px", "zd", "qt", "zp", "qo", "dx", "hj", "gv", "jw", "qc", + "jy", "gj", "qb", "pq", "jg", "bz", "mx", "qm", "mz", "qf", "wj", "zq", "xr", "zk", "cx", "fx", "fv", "bx", + "vw", "vj", "mq", "qv", "zf", "qe", "yj", "gx", "kx", "xg", "qd", "xj", "sx", "vz", "vx", "wv", "yq", "bq", + "gq", "vk", "zj", "xk", "qp", "hx", "fz", "qh", "qj", "jz", "vq", "kq", "xd", "qw", "jx", "qx", "kz", "wx", + "fq", "xz", "zx", "jq", "qg", "qk", "qy", "qz", "wq", "wz", NULL + }; + +/* ******************************************************************** */ + static const char *ndpi_en_impossible_bigrams[] = { "bk", "bq", "bx", "cb", "cf", "cg", "cj", "cp", "cv", "cw", "cx", "dx", "fk", "fq", "fv", "fx", /* "ee", removed it can be found in 'meeting' */ "fz", "gq", "gv", "gx", "hh", "hk", "hv", "hx", "hz", "iy", "jb", /* "jc", jcrew.com */ "jd", "jf", "jg", "jh", "jk", diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index 00ecadb41..a44107679 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -571,9 +571,14 @@ static void init_string_based_protocols(struct ndpi_detection_module_struct *ndp // ac_automata_display(ndpi_str->host_automa.ac_automa, 'n'); #endif +#if 1 for (i = 0; ndpi_en_bigrams[i] != NULL; i++) ndpi_string_to_automa(ndpi_str, &ndpi_str->bigrams_automa, (char *) ndpi_en_bigrams[i], 1, 1, 1, 0); - +#else + for (i = 0; ndpi_en_popular_bigrams[i] != NULL; i++) + ndpi_string_to_automa(ndpi_str, &ndpi_str->bigrams_automa, (char *) ndpi_en_popular_bigrams[i], 1, 1, 1, 0); +#endif + for (i = 0; ndpi_en_impossible_bigrams[i] != NULL; i++) ndpi_string_to_automa(ndpi_str, &ndpi_str->impossible_bigrams_automa, (char *) ndpi_en_impossible_bigrams[i], 1, 1, 1, 0); @@ -4372,6 +4377,13 @@ static void ndpi_reconcile_protocols(struct ndpi_detection_module_struct *ndpi_s Skype for a host doing MS Teams means MS Teams (MS Teams uses Skype as transport protocol for voice/video) */ + + if(flow) { + /* Do not go for DNS when there is an application protocol. Example DNS.Apple */ + if((flow->detected_protocol_stack[1] != NDPI_PROTOCOL_UNKNOWN) + && (flow->detected_protocol_stack[0] /* app */ != flow->detected_protocol_stack[1] /* major */)) + NDPI_CLR_BIT(flow->risk, NDPI_SUSPICIOUS_DGA_DOMAIN); + } switch(ret->app_protocol) { case NDPI_PROTOCOL_MSTEAMS: @@ -6498,58 +6510,101 @@ static int enough(int a, int b) { /* ******************************************************************** */ +// #define DGA_DEBUG 1 + int ndpi_check_dga_name(struct ndpi_detection_module_struct *ndpi_str, struct ndpi_flow_struct *flow, char *name) { - int len = strlen(name), rc = 0; + int len, rc = 0; + len = strlen(name); + if(len >= 5) { - int i, j, num_found = 0, num_impossible = 0, num_bigram_checks = 0; - char tmp[128]; + int i, j, num_found = 0, num_impossible = 0, num_bigram_checks = 0, num_digits = 0, num_vowels = 0, num_words = 0; + char tmp[128], *word, *tok_tmp; len = snprintf(tmp, sizeof(tmp)-1, "%s", name); if(len < 0) return(0); for(i=0, j=0; (i<len) && (j<(sizeof(tmp)-1)); i++) { - if(isdigit(name[i])) - continue; - else tmp[j++] = tolower(name[i]); } tmp[j] = '\0'; len = j; - for(i = 0; i < len; i++) { - if(isdigit(tmp[i])) continue; + for(word = strtok_r(tmp, ".", &tok_tmp); ; word = strtok_r(NULL, ".", &tok_tmp)) { + if(!word) break; - switch(tmp[i]) { - case '-': - case ':': - case '.': - continue; - break; - } + num_words++; + + if(strlen(word) < 3) continue; - if(isdigit(tmp[i+1])) continue; +#ifdef DGA_DEBUG + printf("-> %s [%s][len: %u]\n", word, name, (unsigned int)strlen(word)); +#endif + + for(i = 0; word[i+1] != '\0'; i++) { + if(isdigit(word[i])) { + num_digits++; + + // if(!isdigit(word[i+1])) num_impossible++; + + continue; + } + + switch(word[i]) { + case '_': + case '-': + case ':': + continue; + break; + + case '.': + continue; + break; + } - num_bigram_checks++; + switch(word[i]) { + case 'a': + case 'e': + case 'i': + case 'o': + case 'u': + num_vowels++; + break; + } + + if(isdigit(word[i+1])) { + num_digits++; + // num_impossible++; + continue; + } + + num_bigram_checks++; - if(ndpi_match_bigram(ndpi_str, &ndpi_str->bigrams_automa, &tmp[i])) { - num_found++; - } else if(ndpi_match_bigram(ndpi_str, - &ndpi_str->impossible_bigrams_automa, - &tmp[i])) { + if(ndpi_match_bigram(ndpi_str, &ndpi_str->bigrams_automa, &word[i])) { + num_found++; + } else { + if(ndpi_match_bigram(ndpi_str, + &ndpi_str->impossible_bigrams_automa, + &word[i])) { #ifdef DGA_DEBUG - printf("IMPOSSIBLE %s\n", &tmp[i]); + printf("IMPOSSIBLE %s\n", &word[i]); #endif - num_impossible++; - } - } + num_impossible++; + } + } + } /* for */ + } /* for */ +#ifdef DGA_DEBUG + printf("[num_found: %u][num_impossible: %u][num_digits: %u][num_bigram_checks: %u][num_vowels: %u/%u]\n", + num_found, num_impossible, num_digits, num_bigram_checks, num_vowels, j-num_vowels); +#endif + if(num_bigram_checks - && (num_impossible > 0) - && ((num_found == 0) || enough(num_found, num_impossible))) + && ((num_found == 0) || ((num_digits > 5) && (num_words <= 3)) || enough(num_found, num_impossible))) rc = 1; if(rc && flow) @@ -6557,8 +6612,8 @@ int ndpi_check_dga_name(struct ndpi_detection_module_struct *ndpi_str, #ifdef DGA_DEBUG if(rc) - printf("DGA %s [%s][num_found: %u][num_impossible: %u]\n", - tmp, name, num_found, num_impossible); + printf("DGA %s [num_found: %u][num_impossible: %u]\n", + name, num_found, num_impossible); #endif } |