/* * rrd_similarity.c * * Copyright (C) 2011-22 - ntop.org * * nDPI is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * nDPI is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with nDPI. If not, see . * */ #include #include #include #include #include #include #include "rrd.h" #include "ndpi_api.h" #define DEFAULT_ALPHA 0.5 #define DEFAULT_START "now-1d" #define DEFAULT_END "now" #define MAX_NUM_RRDS 8192 #ifndef PATH_MAX #define PATH_MAX 4096 #endif typedef struct { char *path; float average, stddev; struct ndpi_bin b; } rrd_file_stats; u_int verbose = 0, similarity_threshold = 100, skip_zero = 0; /* *************************************************** */ static void help() { printf("Usage: rrd_similarity [-v][-e ][-q][-s ]\n" " -f -d [-t ]\n" "-a | Set alpha. Valid range >0 .. <1. Default %.2f\n" "-e | RRD end time. Default %s\n" "-q | Quick output (only anomalies are reported)\n" "-s | RRD start time. Default %s\n" "-d | Base directory where RRD filename is searched\n" "-f | Path of the RRD filename to analyze\n" "-t | Similarity threshold. Default %u (0 == alike)\n" "-v | Verbose\n" "-z | Skip zero RRDs during comparison\n" , DEFAULT_ALPHA, DEFAULT_END, DEFAULT_START, similarity_threshold); printf("\n\nExample: rrd_similarity -q -f bytes.rrd -d /var/lib/ntopng/-1/snmpstats\n"); printf("\n\nGoal: find similar RRDs\n"); exit(0); } /* *************************************************** */ void analyze_rrd(rrd_file_stats *rrd, time_t start, time_t end) { unsigned long step = 0, ds_cnt = 0; rrd_value_t *data, *p; char **names; time_t t; u_int i, num_points; struct ndpi_analyze_struct *s; if(rrd_fetch_r(rrd->path, "AVERAGE", &start, &end, &step, &ds_cnt, &names, &data) != 0) { printf("Unable to extract data from rrd %s\n", rrd->path); return; } p = data; num_points = (end-start)/step; if((s = ndpi_alloc_data_analysis(num_points)) == NULL) return; ndpi_init_bin(&rrd->b, ndpi_bin_family32, num_points); /* Step 1 - Compute average and stddev */ for(t=start+1, i=0; tb, i, value); } rrd->average = ndpi_data_average(s); rrd->stddev = ndpi_data_stddev(s); /* Step 2 - Bin analysis */ ndpi_free_data_analysis(s, 1); rrd_freemem(data); } /* *************************************************** */ int circles_touch(int x1, int r1, int x2, int r2) { int radius_sum = r1+r2; int x_diff = abs(x1 - x2); return((radius_sum < x_diff) ? 0 : 1); } /* *************************************************** */ void find_rrd_similarities(rrd_file_stats *rrd, u_int num_rrds) { u_int i, j, num_similar_rrds = 0, num_potentially_zero_equal = 0; for(i=0; i= 0) && (similarity < similarity_threshold)) { if(verbose) printf("%s [%.1f/%.1f] - %s [%.1f/%.1f] are %s [%.1f]\n", rrd[i].path, rrd[i].average, rrd[i].stddev, rrd[j].path, rrd[j].average, rrd[j].stddev, (similarity == 0) ? "alike" : "similar", similarity ); num_similar_rrds++; } } } } printf("Found %u (%.3f %%) similar RRDs / %u zero alike RRDs [num_rrds: %u]\n", num_similar_rrds, (num_similar_rrds*100.)/(float)(num_rrds*num_rrds), num_potentially_zero_equal, num_rrds); } /* *************************************************** */ void find_rrds(char *basedir, char *filename, rrd_file_stats *rrds, u_int *num_rrds) { struct dirent **namelist; int n = scandir(basedir, &namelist, 0, NULL); if(n < 0) return; /* End of the tree */ while(n--) { if(namelist[n]->d_name[0] != '.') { char path[PATH_MAX]; struct stat s; ndpi_snprintf(path, sizeof(path), "%s/%s", basedir, namelist[n]->d_name); if(stat(path, &s) == 0) { if(S_ISDIR(s.st_mode)) find_rrds(path, filename, rrds, num_rrds); else if(strcmp(namelist[n]->d_name, filename) == 0) { if(*num_rrds < MAX_NUM_RRDS) { rrds[*num_rrds].path = strdup(path); if(rrds[*num_rrds].path != NULL) (*num_rrds)++; } } } } free(namelist[n]); } free(namelist); } /* *************************************************** */ int main(int argc, char *argv[]) { rrd_time_value_t start_tv, end_tv; char *filename = NULL, *start_s, *end_s, *basedir = NULL; int c; time_t start, end; u_int num_rrds = 0, i; rrd_file_stats *rrds; /* Defaults */ start_s = DEFAULT_START; end_s = DEFAULT_END; while((c = getopt(argc, argv, "d:s:e:a:qf:t:vz")) != '?') { if(c == -1) break; switch(c) { case 's': start_s = optarg; break; case 'd': basedir = optarg; break; case 'e': end_s = optarg; break; case 'v': verbose = 1; break; case 'f': filename = optarg; break; case 't': similarity_threshold = atoi(optarg); break; case 'z': skip_zero = 1; break; default: help(); break; } } if((filename == NULL) || (basedir == NULL)) help(); if((rrd_parsetime(start_s, &start_tv) != NULL)) { printf("Unable to parse start time %s\n", start_s); return(-1); } if((rrd_parsetime(end_s, &end_tv) != NULL)) { printf("Unable to parse end time %s\n", end_s); return(-1); } rrd_proc_start_end(&start_tv, &end_tv, &start, &end); if((rrds = ndpi_calloc(sizeof(rrd_file_stats), MAX_NUM_RRDS)) == NULL) { printf("Not enough memory !\n"); return(-1); } /* Find all rrd's */ find_rrds(basedir, filename, rrds, &num_rrds); /* Read RRD's data */ for(i=0; i