diff options
author | Ivan Nardi <12729895+IvanNardi@users.noreply.github.com> | 2022-10-28 20:41:37 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-28 20:41:37 +0200 |
commit | db9f6ec1b4018164e5bff05f115dc60711bb711b (patch) | |
tree | be9939aecef5d15ffb55d3f786d2236025cdc906 | |
parent | 175981a42265478729a97e7866363431f5c688d7 (diff) |
Add basic profiling of memory allocations on data-path (#1789)
The goal is to have an idea of the memory allocation sizes performed in
the **library data-path**, i.e. excluding init/deinit phases and all
the allocations made by the application itself.
In other words, how much memory is needed per-flow, by nDPI, other than
`struct ndpi_flow_struct`?
It works only on single-thread configurations.
It is not enabled by default (in the unit tests) since different
canfiguration options (example: `--enable-pcre`) lead to diffferent
results.
See: #1781
-rw-r--r-- | example/ndpiReader.c | 33 | ||||
-rw-r--r-- | example/reader_util.c | 21 | ||||
-rw-r--r-- | fuzz/fuzz_ndpi_reader.c | 4 |
3 files changed, 57 insertions, 1 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c index 96dd46881..399316aa9 100644 --- a/example/ndpiReader.c +++ b/example/ndpiReader.c @@ -127,6 +127,11 @@ extern u_int16_t min_pattern_len, max_pattern_len; extern void ndpi_self_check_host_match(); /* Self check function */ u_int8_t dump_internal_stats; +struct ndpi_bin malloc_bins; +int enable_malloc_bins = 0; +int max_malloc_bins = 14; +int malloc_size_stats = 0; + struct flow_info { struct ndpi_flow_info *flow; u_int16_t thread_id; @@ -506,6 +511,7 @@ static void help(u_int long_help) { " -I | Ignore VLAN id for flow hash calculation\n" " -z | Enable JA3+\n" " -A | Dump internal statistics (LRU caches / Patricia trees / Ahocarasick automas / ...\n" + " -M | Memory allocation stats on data-path (only by the library). It works only on single-thread configuration\n" , human_readeable_string_len, min_pattern_len, max_pattern_len, max_num_packets_per_flow, max_packet_payload_dissection, @@ -808,7 +814,7 @@ static void parseOptions(int argc, char **argv) { } #endif - while((opt = getopt_long(argc, argv, "a:Ab:e:Ec:C:dDf:g:i:Ij:k:K:S:hHp:pP:l:r:s:tu:v:V:n:rp:x:w:zq0123:456:7:89:m:T:U:", + while((opt = getopt_long(argc, argv, "a:Ab:e:Ec:C:dDf:g:i:Ij:k:K:S:hHp:pP:l:r:s:tu:v:V:n:rp:x:w:zq0123:456:7:89:m:MT:U:", longopts, &option_idx)) != EOF) { #ifdef DEBUG_TRACE if(trace) fprintf(trace, " #### Handling option -%c [%s] #### \n", opt, optarg ? optarg : ""); @@ -974,6 +980,11 @@ static void parseOptions(int argc, char **argv) { } break; + case 'M': + enable_malloc_bins = 1; + ndpi_init_bin(&malloc_bins, ndpi_bin_family64, max_malloc_bins); + break; + case 'k': errno = 0; if((serialization_fp = fopen(optarg, "w")) == NULL) @@ -1110,6 +1121,12 @@ static void parseOptions(int argc, char **argv) { for(thread_id = 1; thread_id < num_threads; thread_id++) _pcap_file[thread_id] = _pcap_file[0]; } + + if(num_threads > 1 && enable_malloc_bins == 1) + { + printf("Memory profiling ('-M') is incompatible with multi-thread enviroment"); + exit(1); + } } #ifdef __linux__ @@ -1881,8 +1898,10 @@ static void node_proto_guess_walker(const void *node, ndpi_VISIT which, int dept if((!flow->detection_completed) && flow->ndpi_flow) { u_int8_t proto_guessed; + malloc_size_stats = 1; flow->detected_protocol = ndpi_detection_giveup(ndpi_thread_info[0].workflow->ndpi_struct, flow->ndpi_flow, enable_protocol_guess, &proto_guessed); + malloc_size_stats = 0; if(enable_protocol_guess) ndpi_thread_info[thread_id].workflow->stats.guessed_flow_protocols++; } @@ -3528,6 +3547,8 @@ static void printResults(u_int64_t processing_time_usec, u_int64_t setup_time_us } if(dump_internal_stats) { + char buf[1024]; + if(cumulative_stats.ndpi_flow_count) printf("\tNum dissector calls: %-13llu (%.2f diss/flow)\n", (long long unsigned int)cumulative_stats.num_dissector_calls, @@ -3587,6 +3608,9 @@ static void printResults(u_int64_t processing_time_usec, u_int64_t setup_time_us printf("\tPatricia protocols: %llu/%llu (search/found)\n", (long long unsigned int)cumulative_stats.patricia_stats[NDPI_PTREE_PROTOCOLS].n_search, (long long unsigned int)cumulative_stats.patricia_stats[NDPI_PTREE_PROTOCOLS].n_found); + + if(enable_malloc_bins) + printf("\tData-path malloc histogram: %s\n", ndpi_print_bin(&malloc_bins, 0, buf, sizeof(buf))); } } @@ -3615,6 +3639,8 @@ static void printResults(u_int64_t processing_time_usec, u_int64_t setup_time_us } if(dump_internal_stats) { + char buf[1024]; + if(cumulative_stats.ndpi_flow_count) fprintf(results_file, "Num dissector calls: %llu (%.2f diss/flow)\n", (long long unsigned int)cumulative_stats.num_dissector_calls, @@ -3674,6 +3700,9 @@ static void printResults(u_int64_t processing_time_usec, u_int64_t setup_time_us fprintf(results_file, "Patricia protocols: %llu/%llu (search/found)\n", (long long unsigned int)cumulative_stats.patricia_stats[NDPI_PTREE_PROTOCOLS].n_search, (long long unsigned int)cumulative_stats.patricia_stats[NDPI_PTREE_PROTOCOLS].n_found); + + if(enable_malloc_bins) + fprintf(results_file, "Data-path malloc histogram: %s\n", ndpi_print_bin(&malloc_bins, 0, buf, sizeof(buf))); } fprintf(results_file, "\n"); @@ -5149,6 +5178,8 @@ void zscoreUnitTest() { if(extcap_dumper) pcap_dump_close(extcap_dumper); if(extcap_fifo_h) pcap_close(extcap_fifo_h); if(ndpi_info_mod) ndpi_exit_detection_module(ndpi_info_mod); + if(enable_malloc_bins) + ndpi_free_bin(&malloc_bins); if(csv_fp) fclose(csv_fp); ndpi_free(_debug_protocols); diff --git a/example/reader_util.c b/example/reader_util.c index ba8031185..ef522a84e 100644 --- a/example/reader_util.c +++ b/example/reader_util.c @@ -80,6 +80,11 @@ static u_int32_t flow_id = 0; u_int8_t enable_doh_dot_detection = 0; extern ndpi_init_prefs init_prefs; +extern int malloc_size_stats; +extern struct ndpi_bin malloc_bins; +extern int max_malloc_bins; +extern int enable_malloc_bins; + /* ****************************************************** */ struct flow_id_stats { @@ -309,6 +314,17 @@ void ndpi_free_flow_info_half(struct ndpi_flow_info *flow) { extern u_int32_t current_ndpi_memory, max_ndpi_memory; +static u_int32_t __slot_malloc_bins(u_int64_t v) +{ + int i; + + /* 0-2,3-4,5-8,9-16,17-32,33-64,65-128,129-256,257-512,513-1024,1025-2048,2049-4096,4097-8192,8193- */ + for(i=0; i < max_malloc_bins - 1; i++) + if((1ULL << (i + 1)) >= v) + return i; + return i; +} + /** * @brief ndpi_malloc wrapper function */ @@ -318,6 +334,9 @@ static void *ndpi_malloc_wrapper(size_t size) { if(current_ndpi_memory > max_ndpi_memory) max_ndpi_memory = current_ndpi_memory; + if(enable_malloc_bins && malloc_size_stats) + ndpi_inc_bin(&malloc_bins, __slot_malloc_bins(size), 1); + return(malloc(size)); /* Don't change to ndpi_malloc !!!!! */ } @@ -1620,6 +1639,7 @@ static struct ndpi_proto packet_processing(struct ndpi_workflow * workflow, /* Set here any information (easily) available; in this trivial example we don't have any */ input_info.in_pkt_dir = NDPI_IN_PKT_DIR_UNKNOWN; input_info.seen_flow_beginning = NDPI_FLOW_BEGINNING_UNKNOWN; + malloc_size_stats = 1; flow->detected_protocol = ndpi_detection_process_packet(workflow->ndpi_struct, ndpi_flow, iph ? (uint8_t *)iph : (uint8_t *)iph6, ipsize, time_ms, &input_info); @@ -1650,6 +1670,7 @@ static struct ndpi_proto packet_processing(struct ndpi_workflow * workflow, process_ndpi_collected_info(workflow, flow); } } + malloc_size_stats = 0; } #if 0 diff --git a/fuzz/fuzz_ndpi_reader.c b/fuzz/fuzz_ndpi_reader.c index 084e49c19..1cdb212c1 100644 --- a/fuzz/fuzz_ndpi_reader.c +++ b/fuzz/fuzz_ndpi_reader.c @@ -18,6 +18,10 @@ u_int8_t enable_flow_stats = 0; u_int8_t human_readeable_string_len = 5; u_int8_t max_num_udp_dissected_pkts = 16 /* 8 is enough for most protocols, Signal requires more */, max_num_tcp_dissected_pkts = 80 /* due to telnet */; ndpi_init_prefs init_prefs = ndpi_track_flow_payload | ndpi_enable_ja3_plus; +int enable_malloc_bins = 0; +int malloc_size_stats = 0; +int max_malloc_bins = 0; +struct ndpi_bin malloc_bins; /* unused */ int bufferToFile(const char * name, const uint8_t *Data, size_t Size) { FILE * fd; |