aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.travis.yml8
-rw-r--r--example/ndpiReader.c8
-rw-r--r--src/lib/ndpi_analyze.c177
-rw-r--r--src/lib/ndpi_main.c4
4 files changed, 129 insertions, 68 deletions
diff --git a/.travis.yml b/.travis.yml
index 789e0e196..2c359b356 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -120,7 +120,7 @@ matrix:
- MATRIX_EVAL="CC=clang-7"
- name: fuzza
- env: CFLAGS="-g3 -O0 -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION" LDFLAGS="-g3 -O0" QA_FUZZ=asan CC=clang-7 && ASAN_SYMBOLIZER_PATH=/usr/local/clang-7.0.0/bin/llvm-symbolizer
+ env: CXXFLAGS="-g3 -O0 -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=address -fsanitize=fuzzer-no-link" CFLAGS="-g3 -O0 -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=address -fsanitize=fuzzer-no-link" LDFLAGS="-g3 -O0 -fsanitize=address" QA_FUZZ=asan CC=clang-7 && CXX=clang++-7 ASAN_SYMBOLIZER_PATH=/usr/local/clang-7.0.0/bin/llvm-symbolizer
os: linux
compiler: clang-7
addons:
@@ -133,7 +133,7 @@ matrix:
- libpcap-dev
- autogen
- name: fuzzm
- env: CFLAGS="-g3 -O0 -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=memory -fsanitize=fuzzer-no-link" LDFLAGS="-g3 -O0 -fsanitize=memory" QA_FUZZ=msan CC=clang-7 && MSAN_SYMBOLIZER_PATH=/usr/local/clang-7.0.0/bin/llvm-symbolizer
+ env: CXXFLAGS="-g3 -O0 -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=memory -fsanitize=fuzzer-no-link" CFLAGS="-g3 -O0 -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=memory -fsanitize=fuzzer-no-link" LDFLAGS="-g3 -O0 -fsanitize=memory" QA_FUZZ=msan CC=clang-7 && CXX=clang++-7 MSAN_SYMBOLIZER_PATH=/usr/local/clang-7.0.0/bin/llvm-symbolizer
os: linux
compiler: clang-7
addons:
@@ -146,7 +146,7 @@ matrix:
- libpcap-dev
- autogen
- name: fuzzu
- env: CFLAGS="-g3 -O0 -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION" LDFLAGS="-g3 -O0" QA_FUZZ=ubsan CC=clang-7
+ env: CXXFLAGS="-g3 -O0 -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=undefined -fsanitize=fuzzer-no-link" CFLAGS="-g3 -O0 -DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION -fsanitize=undefined -fno-sanitize-recover=undefined,integer -fsanitize=fuzzer-no-link" LDFLAGS="-g3 -O0 -fsanitize=undefined" QA_FUZZ=ubsan CC=clang-7 && CXX=clang++-7
os: linux
compiler: clang-7
addons:
@@ -168,7 +168,7 @@ before_script:
# - lcov --directory . --zerocounters
script:
- - if [ -n "$QA_FUZZ" -a "$QA_FUZZ" != "msan" ]; then ./configure --enable-fuzztargets ; else ./configure ; fi
+ - if [ -n "$QA_FUZZ" ]; then ./configure --enable-fuzztargets ; else ./configure ; fi
- make
#after_script:
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index 4f1767846..54d5abd4b 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -2503,17 +2503,17 @@ static void printFlowsStats() {
printf("]\n");
}
- printf("\t%-10s\t%s:%u <-> %s:%u\t[",
- // cluster_ids[i],
+ printf("\t%u\t%-10s\t%s:%u <-> %s:%u\t[",
+ i,
ndpi_protocol2name(ndpi_thread_info[0].workflow->ndpi_struct,
all_flows[i].flow->detected_protocol, buf, sizeof(buf)),
all_flows[i].flow->src_name,
ntohs(all_flows[i].flow->src_port),
- all_flows[i].flow->src_name,
+ all_flows[i].flow->dst_name,
ntohs(all_flows[i].flow->dst_port));
print_bin(out, NULL, &bins[i]);
- printf("][score: %f]", ndpi_bin_similarity(&centroids[j], &bins[i], 0));
+ printf("][similarity: %f]", ndpi_bin_similarity(&centroids[j], &bins[i], 0));
if(all_flows[i].flow->ssh_tls.client_requested_server_name[0] != '\0')
fprintf(out, "[%s]", all_flows[i].flow->ssh_tls.client_requested_server_name);
diff --git a/src/lib/ndpi_analyze.c b/src/lib/ndpi_analyze.c
index a2fe557ae..26f2ae041 100644
--- a/src/lib/ndpi_analyze.c
+++ b/src/lib/ndpi_analyze.c
@@ -78,7 +78,7 @@ void ndpi_free_data_analysis(struct ndpi_analyze_struct *d) {
void ndpi_reset_data_analysis(struct ndpi_analyze_struct *d) {
memset(d, 0, sizeof(struct ndpi_analyze_struct));
memset(d->values, 0, sizeof(u_int32_t)*d->num_values_array_len);
- d->num_data_entries = 0;
+ d->num_data_entries = 0;
}
/* ********************************************************************************* */
@@ -123,11 +123,11 @@ float ndpi_data_average(struct ndpi_analyze_struct *s) {
u_int32_t ndpi_data_last(struct ndpi_analyze_struct *s) {
if((s->num_data_entries == 0) || (s->sum_total == 0))
return(0);
-
+
if(s->next_value_insert_index == 0)
return(s->values[s->num_values_array_len-1]);
else
- return(s->values[s->next_value_insert_index-1]);
+ return(s->values[s->next_value_insert_index-1]);
}
/* Return min/max on all values */
@@ -164,7 +164,7 @@ float ndpi_data_window_average(struct ndpi_analyze_struct *s) {
if(n == 0)
return(0);
-
+
for(i=0; i<n; i++)
sum += s->values[i];
@@ -183,7 +183,7 @@ float ndpi_data_window_variance(struct ndpi_analyze_struct *s) {
if(n == 0)
return(0);
-
+
for(i=0; i<n; i++)
sum += pow(s->values[i]-avg, 2);
@@ -393,7 +393,7 @@ void ndpi_set_bin(struct ndpi_bin *b, u_int8_t slot_id, u_int32_t val) {
void ndpi_inc_bin(struct ndpi_bin *b, u_int8_t slot_id, u_int32_t val) {
b->is_empty = 0;
-
+
if(slot_id >= b->num_bins) slot_id = 0;
switch(b->family) {
@@ -456,7 +456,7 @@ void ndpi_normalize_bin(struct ndpi_bin *b) {
u_int32_t tot = 0;
if(b->is_empty) return;
-
+
switch(b->family) {
case ndpi_bin_family8:
for(i=0; i<b->num_bins; i++) tot += b->u.bins8[i];
@@ -567,10 +567,10 @@ float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t nor
for(i=0; i<b1->num_bins; i++) {
u_int32_t a = ndpi_get_bin_value(b1, i);
u_int32_t b = ndpi_get_bin_value(b2, i);
-
+
sumxx += a*a, sumyy += b*b, sumxy += a*b;
}
-
+
if((sumxx == 0) || (sumyy == 0))
return(0);
else
@@ -583,10 +583,13 @@ float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t nor
for(i=0; i<b1->num_bins; i++) {
u_int32_t a = ndpi_get_bin_value(b1, i);
u_int32_t b = ndpi_get_bin_value(b2, i);
+ u_int32_t diff = (a > b) ? (a - b) : (b - a);
+
+ if(a != b) sum += pow(diff, 2);
- sum += pow(a-b, 2);
+ // printf("[a: %u][b: %u][sum: %u]\n", a, b, sum);
}
-
+
/* The lower the more similar */
return(sqrt(sum));
}
@@ -595,6 +598,8 @@ float ndpi_bin_similarity(struct ndpi_bin *b1, struct ndpi_bin *b2, u_int8_t nor
/* ********************************************************************************* */
+#define MAX_NUM_CLUSTERS 128
+
/*
Clusters bins into 'num_clusters'
- (in) bins: a vection 'num_bins' long of bins to cluster
@@ -610,18 +615,27 @@ int ndpi_cluster_bins(struct ndpi_bin *bins, u_int16_t num_bins,
u_int16_t i, j, max_iterations = 25, num_iterations, num_moves;
u_int8_t verbose = 0, alloc_centroids = 0;
char out_buf[256];
+ float *bin_score;
+ u_int16_t num_cluster_elems[MAX_NUM_CLUSTERS] = { 0 };
- if(num_clusters > num_bins) num_clusters = num_bins;
+ srand(time(NULL));
+
+ if(num_clusters > num_bins) num_clusters = num_bins;
+ if(num_clusters > MAX_NUM_CLUSTERS) num_clusters = MAX_NUM_CLUSTERS;
if(verbose)
printf("Distributing %u bins over %u clusters\n", num_bins, num_clusters);
+ if((bin_score = (float*)ndpi_calloc(num_bins, sizeof(float))) == NULL)
+ return(-2);
+
if(centroids == NULL) {
alloc_centroids = 1;
- if((centroids = (struct ndpi_bin*)ndpi_malloc(sizeof(struct ndpi_bin)*num_clusters)) == NULL)
+ if((centroids = (struct ndpi_bin*)ndpi_malloc(sizeof(struct ndpi_bin)*num_clusters)) == NULL) {
+ ndpi_free(bin_score);
return(-2);
- else {
+ } else {
for(i=0; i<num_clusters; i++)
ndpi_init_bin(&centroids[i], ndpi_bin_family32 /* Use 32 bit to avoid overlaps */, bins[0].num_bins);
}
@@ -631,52 +645,33 @@ int ndpi_cluster_bins(struct ndpi_bin *bins, u_int16_t num_bins,
memset(cluster_ids, 0, sizeof(u_int16_t) * num_bins);
/* Randomly pick a cluster id */
- for(i=0; i<num_clusters; i++) {
- cluster_ids[i] = i;
+ for(i=0; i<num_bins; i++) {
+ u_int cluster_id = rand() % num_clusters;
- if(verbose)
- printf("Initializing cluster %u: %s\n", i,
- ndpi_print_bin(&bins[i], 0, out_buf, sizeof(out_buf)));
-
- }
-
- /* Assign the remaining bins to the nearest cluster */
- for(i=num_clusters; i<num_bins; i++) {
- u_int16_t j;
- float best_similarity;
- u_int8_t cluster_id = 0;
-
-#ifdef COSINE_SIMILARITY
- best_similarity = -1;
-#else
- best_similarity = 99999999999;
-#endif
-
- for(j=0; j<num_clusters; j++) {
- float similarity = ndpi_bin_similarity(&bins[i], &bins[j], 0);
-
-#ifdef COSINE_SIMILARITY
- if(similarity > best_similarity)
-#else
- if(similarity < best_similarity)
-#endif
- cluster_id = j, best_similarity = similarity;
- }
+ cluster_ids[i] = cluster_id;
if(verbose)
- printf("Assigned bin to cluster %u: %s [score: %f]\n", cluster_id,
- ndpi_print_bin(&bins[i], 0, out_buf, sizeof(out_buf)), best_similarity);
+ printf("Initializing cluster %u for bin %u: %s\n",
+ cluster_id, i,
+ ndpi_print_bin(&bins[i], 0, out_buf, sizeof(out_buf)));
- cluster_ids[i] = cluster_id;
+ num_cluster_elems[cluster_id]++;
}
num_iterations = 0;
/* Now let's try to find a better arrangement */
while(num_iterations++ < max_iterations) {
- /* Find the center of each cluster */
- if(verbose) printf("Iteration %u\n", num_iterations);
+ /* Compute the centroids for each cluster */
+ memset(bin_score, 0, num_bins*sizeof(float));
+
+ if(verbose) {
+ printf("\nIteration %u\n", num_iterations);
+
+ for(j=0; j<num_clusters; j++)
+ printf("Cluster %u: %u bins\n", j, num_cluster_elems[j]);
+ }
for(i=0; i<num_clusters; i++)
ndpi_reset_bin(&centroids[i]);
@@ -700,38 +695,61 @@ int ndpi_cluster_bins(struct ndpi_bin *bins, u_int16_t num_bins,
for(i=0; i<num_bins; i++) {
u_int16_t j;
- float best_similarity;
+ float best_similarity, current_similarity = 0;
u_int8_t cluster_id = 0;
+ if(verbose)
+ printf("Analysing bin %u [cluster: %u]\n",
+ i, cluster_ids[i]);
+
#ifdef COSINE_SIMILARITY
- best_similarity = -1;
+ best_similarity = -1;
#else
- best_similarity = 99999999999;
+ best_similarity = 99999999999;
#endif
for(j=0; j<num_clusters; j++) {
float similarity;
if(centroids[j].is_empty) continue;
-
+
similarity = ndpi_bin_similarity(&bins[i], &centroids[j], 0);
+ if(j == cluster_ids[i])
+ current_similarity = similarity;
+
if(verbose)
printf("Bin %u / centroid %u [similarity: %f]\n", i, j, similarity);
#ifdef COSINE_SIMILARITY
- if(similarity > best_similarity)
+ if(similarity > best_similarity) {
+ cluster_id = j, best_similarity = similarity;
+ }
#else
- if(similarity < best_similarity)
-#endif
+ if(similarity < best_similarity) {
cluster_id = j, best_similarity = similarity;
+ }
+#endif
}
- if(/* (best_similarity > 0) && */ (cluster_ids[i] != cluster_id)) {
+ if((best_similarity == current_similarity) && (num_cluster_elems[cluster_ids[i]] > 1)) {
+ /*
+ In case of identical similarity let's leave things as they are
+ this unless this is a cluster with only one element
+ */
+ cluster_id = cluster_ids[i];
+ }
+
+ bin_score[i] = best_similarity;
+
+ if(cluster_ids[i] != cluster_id) {
if(verbose)
printf("Moved bin %u from cluster %u -> %u [similarity: %f]\n",
i, cluster_ids[i], cluster_id, best_similarity);
+ num_cluster_elems[cluster_ids[i]]--;
+ num_cluster_elems[cluster_id]++;
+
cluster_ids[i] = cluster_id;
num_moves++;
}
@@ -739,7 +757,48 @@ int ndpi_cluster_bins(struct ndpi_bin *bins, u_int16_t num_bins,
if(num_moves == 0)
break;
- }
+
+ if(verbose) {
+ for(j=0; j<num_clusters; j++)
+ printf("Cluster %u: %u bins\n", j, num_cluster_elems[j]);
+ }
+
+#if 0
+ for(j=0; j<num_clusters; j++) {
+ if(num_cluster_elems[j] == 0) {
+ u_int16_t candidate;
+ float score;
+
+ if(verbose)
+ printf("\nCluster %u is empty: need to rebalance\n", j);
+
+#ifdef COSINE_SIMILARITY
+ score = 99999999999;
+
+ for(i=0; i<num_bins; i++) {
+ if((cluster_ids[i] != j) && (bin_score[i] < score) && (num_cluster_elems[cluster_ids[i]] > 1))
+ score = bin_score[i], candidate = i;
+ }
+#else
+ score = 0;
+
+ for(i=0; i<num_bins; i++) {
+ if((cluster_ids[i] != j) && (bin_score[i] > score) && (num_cluster_elems[cluster_ids[i]] > 1))
+ score = bin_score[i], candidate = i;
+ }
+#endif
+
+ if(verbose)
+ printf("Rebalance: moving bin %u from cluster %u -> %u [similarity: %f]\n",
+ candidate, cluster_ids[candidate], j, score);
+
+ num_cluster_elems[cluster_ids[candidate]]--;
+ num_cluster_elems[j]++;
+ cluster_ids[candidate] = j;
+ }
+ }
+#endif
+ } /* while(...) */
if(alloc_centroids) {
for(i=0; i<num_clusters; i++)
@@ -748,6 +807,8 @@ int ndpi_cluster_bins(struct ndpi_bin *bins, u_int16_t num_bins,
ndpi_free(centroids);
}
+ ndpi_free(bin_score);
+
return(0);
}
diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c
index b2f294c0b..b497bbaf8 100644
--- a/src/lib/ndpi_main.c
+++ b/src/lib/ndpi_main.c
@@ -1035,8 +1035,8 @@ static void ndpi_init_protocol_defaults(struct ndpi_detection_module_struct *ndp
ndpi_build_default_ports(ports_a, 5900, 5901, 5800, 0, 0) /* TCP */,
ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */);
ndpi_set_proto_defaults(ndpi_str, NDPI_PROTOCOL_ACCEPTABLE, NDPI_PROTOCOL_FREE90, 0 /* can_have_a_subprotocol */,
- no_master, no_master, "Free90", NDPI_PROTOCOL_CATEGORY_REMOTE_ACCESS,
- ndpi_build_default_ports(ports_a, 5900, 5901, 5800, 0, 0) /* TCP */,
+ no_master, no_master, "FREE_90", NDPI_PROTOCOL_CATEGORY_REMOTE_ACCESS,
+ ndpi_build_default_ports(ports_a, 0, 0, 0, 0, 0) /* TCP */,
ndpi_build_default_ports(ports_b, 0, 0, 0, 0, 0) /* UDP */);
ndpi_set_proto_defaults(ndpi_str, NDPI_PROTOCOL_ACCEPTABLE, NDPI_PROTOCOL_ZOOM, 0 /* can_have_a_subprotocol */,
no_master, no_master, "Zoom", NDPI_PROTOCOL_CATEGORY_VIDEO,