aboutsummaryrefslogtreecommitdiff
path: root/example
diff options
context:
space:
mode:
authorLuca Deri <deri@ntop.org>2019-07-24 23:55:07 +0200
committerLuca Deri <deri@ntop.org>2019-07-24 23:55:07 +0200
commit3369716fc3cafa28ea20cb98a89596e19aec6225 (patch)
treee2111dcdd168a82592b983b6bf2a8d20e400fcd9 /example
parent21485683475ab6b3bb4468f142843d5f15f412f8 (diff)
Added SPLT (sequence of packet length and time) and BD (byte distribution) with -J
Diffstat (limited to 'example')
-rw-r--r--example/ndpiReader.c124
-rw-r--r--example/reader_util.c180
-rw-r--r--example/reader_util.h32
3 files changed, 322 insertions, 14 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c
index 32321aa94..e5ccc5648 100644
--- a/example/ndpiReader.c
+++ b/example/ndpiReader.c
@@ -82,7 +82,7 @@ static u_int8_t live_capture = 0;
static u_int8_t undetected_flows_deleted = 0;
/** User preferences **/
u_int8_t enable_protocol_guess = 1;
-u_int8_t verbose = 0, json_flag = 0;
+u_int8_t verbose = 0, json_flag = 0, enable_joy_stats = 0;
int nDPI_LogLevel = 0;
char *_debug_protocols = NULL;
static u_int8_t stats_flag = 0, bpf_filter_flag = 0;
@@ -225,6 +225,110 @@ FILE *trace = NULL;
*/
static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle);
+static void reduceBDbits(uint32_t *bd, unsigned int len) {
+ int mask = 0;
+ int shift = 0;
+ unsigned int i = 0;
+
+ for(i = 0; i < len; i++)
+ mask = mask | bd[i];
+
+ mask = mask >> 8;
+ for(i = 0; i < 24 && mask; i++) {
+ mask = mask >> 1;
+ if (mask == 0) {
+ shift = i+1;
+ break;
+ }
+ }
+
+ for(i = 0; i < len; i++)
+ bd[i] = bd[i] >> shift;
+}
+
+/**
+ * @brief Get flow byte distribution mean and variance
+ */
+static void
+flowGetBDMeanandVariance(struct ndpi_flow_info* flow) {
+ FILE *out = results_file ? results_file : stdout;
+
+ const uint32_t *array = NULL;
+ uint32_t tmp[256], i;
+ unsigned int num_bytes;
+ double mean = 0.0, variance = 0.0;
+
+ fflush(out);
+
+ /*
+ * Sum up the byte_count array for outbound and inbound flows,
+ * if this flow is bidirectional
+ */
+ if (!flow->bidirectional) {
+ array = flow->src2dst_byte_count;
+ num_bytes = flow->src2dst_l4_bytes;
+ for (i=0; i<256; i++) {
+ tmp[i] = flow->src2dst_byte_count[i];
+ }
+
+ if (flow->src2dst_num_bytes != 0) {
+ mean = flow->src2dst_bd_mean;
+ variance = flow->src2dst_bd_variance/(flow->src2dst_num_bytes - 1);
+ variance = sqrt(variance);
+
+ if (flow->src2dst_num_bytes == 1) {
+ variance = 0.0;
+ }
+ }
+ } else {
+ for (i=0; i<256; i++) {
+ tmp[i] = flow->src2dst_byte_count[i] + flow->dst2src_byte_count[i];
+ }
+ array = tmp;
+ num_bytes = flow->src2dst_l4_bytes + flow->dst2src_l4_bytes;
+
+ if (flow->src2dst_num_bytes + flow->dst2src_num_bytes != 0) {
+ mean = ((double)flow->src2dst_num_bytes)/((double)(flow->src2dst_num_bytes+flow->dst2src_num_bytes))*flow->src2dst_bd_mean +
+ ((double)flow->dst2src_num_bytes)/((double)(flow->dst2src_num_bytes+flow->src2dst_num_bytes))*flow->dst2src_bd_mean;
+
+ variance = ((double)flow->src2dst_num_bytes)/((double)(flow->src2dst_num_bytes+flow->dst2src_num_bytes))*flow->src2dst_bd_variance +
+ ((double)flow->dst2src_num_bytes)/((double)(flow->dst2src_num_bytes+flow->src2dst_num_bytes))*flow->dst2src_bd_variance;
+
+ variance = variance/((double)(flow->src2dst_num_bytes + flow->dst2src_num_bytes - 1));
+ variance = sqrt(variance);
+ if (flow->src2dst_num_bytes + flow->dst2src_num_bytes == 1) {
+ variance = 0.0;
+ }
+ }
+ }
+
+ if(enable_joy_stats) {
+ if(verbose > 1) {
+ reduceBDbits(tmp, 256);
+ array = tmp;
+
+ fprintf(out, " [byte_dist: ");
+ for(i = 0; i < 255; i++)
+ fprintf(out, "%u,", (unsigned char)array[i]);
+
+ fprintf(out, "%u]", (unsigned char)array[i]);
+ }
+
+ /* Output the mean */
+ if(num_bytes != 0) {
+ fprintf(out, "][byte_dist_mean: %f", mean);
+ fprintf(out, "][byte_dist_std: %f]", variance);
+ }
+
+ if(num_bytes != 0) {
+ double entropy = ndpi_flow_get_byte_count_entropy(array, num_bytes);
+
+ fprintf(out, "[entropy: %f]", entropy);
+ fprintf(out, "[total_entropy: %f]", entropy * num_bytes);
+ }
+ }
+}
+
/**
* @brief Print help instructions
*/
@@ -236,7 +340,7 @@ static void help(u_int long_help) {
"-i <file|device> "
#endif
"[-f <filter>][-s <duration>][-m <duration>]\n"
- " [-p <protos>][-l <loops> [-q][-d][-h][-e <len>][-t][-v <level>]\n"
+ " [-p <protos>][-l <loops> [-q][-d][-J][-h][-e <len>][-t][-v <level>]\n"
" [-n <threads>][-w <file>][-c <file>][-j <file>][-x <file>]\n\n"
"Usage:\n"
" -i <file.pcap|device> | Specify a pcap file/playlist to read packets from or a\n"
@@ -255,6 +359,8 @@ static void help(u_int long_help) {
" -d | Disable protocol guess and use only DPI\n"
" -e <len> | Min human readeable string match len. Default %u\n"
" -q | Quiet mode\n"
+ " -J | Display flow SPLT (sequence of packet length and time)\n"
+ " | and BD (byte distribution). See https://github.com/cisco/joy\n"
" -t | Dissect GTP/TZSP tunnels\n"
" -r | Print nDPI version and git revision\n"
" -c <path> | Load custom categories from the specified file\n"
@@ -327,6 +433,7 @@ static struct option longopts[] = {
{ "version", no_argument, NULL, 'V'},
{ "help", no_argument, NULL, 'h'},
{ "json", required_argument, NULL, 'j'},
+ { "joy", required_argument, NULL, 'J'},
{ "result-path", required_argument, NULL, 'w'},
{ "quiet", no_argument, NULL, 'q'},
@@ -477,7 +584,7 @@ static void parseOptions(int argc, char **argv) {
}
#endif
- while((opt = getopt_long(argc, argv, "e:c:df:g:i:hp:l:s:tv:V:n:j:rp:w:q0123:456:7:89:m:b:x:", longopts, &option_idx)) != EOF) {
+ while((opt = getopt_long(argc, argv, "e:c:df:g:i:hp:l:s:tv:V:n:j:Jrp:w:q0123:456:7:89:m:b:x:", longopts, &option_idx)) != EOF) {
#ifdef DEBUG_TRACE
if(trace) fprintf(trace, " #### -%c [%s] #### \n", opt, optarg ? optarg : "");
#endif
@@ -573,6 +680,10 @@ static void parseOptions(int argc, char **argv) {
help(1);
break;
+ case 'J':
+ enable_joy_stats = 1;
+ break;
+
case 'j':
#ifndef HAVE_JSON_C
printf("WARNING: this copy of ndpiReader has been compiled without json-c: JSON export disabled\n");
@@ -799,6 +910,13 @@ static void printFlow(u_int16_t id, struct ndpi_flow_info *flow, u_int16_t threa
if(flow->vlan_id > 0) fprintf(out, "[VLAN: %u]", flow->vlan_id);
+ if(enable_joy_stats) {
+ /* Print entropy values for monitored flows. */
+ flowGetBDMeanandVariance(flow);
+ fflush(out);
+ fprintf(out, "[score: %.4f]", flow->score);
+ }
+
if(flow->detected_protocol.master_protocol) {
char buf[64];
diff --git a/example/reader_util.c b/example/reader_util.c
index d0ec31930..78729a393 100644
--- a/example/reader_util.c
+++ b/example/reader_util.c
@@ -34,6 +34,8 @@
#else
#include <unistd.h>
#include <netinet/in.h>
+#include <math.h>
+#include <float.h>
#endif
#ifndef ETH_P_IP
@@ -73,8 +75,9 @@
#include "ndpi_main.h"
#include "reader_util.h"
+#include "ndpi_classify.h"
-extern u_int8_t enable_protocol_guess;
+extern u_int8_t enable_protocol_guess, enable_joy_stats;
extern u_int8_t verbose, human_readeable_string_len;
/* ***************************************************** */
@@ -273,6 +276,94 @@ int ndpi_workflow_node_cmp(const void *a, const void *b) {
return(0); /* notreached */
}
+/**
+ * \brief Update the byte count for the flow record.
+ * \param f Flow data
+ * \param x Data to use for update
+ * \param len Length of the data (in bytes)
+ * \return none
+ */
+static void
+ndpi_flow_update_byte_count(struct ndpi_flow_info *flow, const void *x,
+ unsigned int len, u_int8_t src_to_dst_direction) {
+ const unsigned char *data = x;
+ u_int32_t i;
+ u_int32_t current_count = 0;
+
+ /*
+ * implementation note: The spec says that 4000 octets is enough of a
+ * sample size to accurately reflect the byte distribution. Also, to avoid
+ * wrapping of the byte count at the 16-bit boundry, we stop counting once
+ * the 4000th octet has been seen for a flow.
+ */
+
+ /* octet count was already incremented before processing this payload */
+ if (src_to_dst_direction) {
+ current_count = flow->src2dst_l4_bytes - len;
+ } else {
+ current_count = flow->dst2src_l4_bytes - len;
+ }
+
+ if (current_count < ETTA_MIN_OCTETS) {
+ for (i=0; i<len; i++) {
+ if (src_to_dst_direction) {
+ flow->src2dst_byte_count[data[i]]++;
+ } else {
+ flow->dst2src_byte_count[data[i]]++;
+ }
+ current_count++;
+ if (current_count >= ETTA_MIN_OCTETS) {
+ break;
+ }
+ }
+ }
+}
+
+/**
+ * \brief Update the byte distribution mean for the flow record.
+ * \param f Flow record
+ * \param x Data to use for update
+ * \param len Length of the data (in bytes)
+ * \return none
+ */
+static void
+ndpi_flow_update_byte_dist_mean_var(ndpi_flow_info_t *flow, const void *x,
+ unsigned int len, u_int8_t src_to_dst_direction) {
+ const unsigned char *data = x;
+ double delta;
+ unsigned int i;
+
+ for (i=0; i<len; i++) {
+ if (src_to_dst_direction) {
+ flow->src2dst_num_bytes += 1;
+ delta = ((double)data[i] - flow->src2dst_bd_mean);
+ flow->src2dst_bd_mean += delta/((double)flow->src2dst_num_bytes);
+ flow->src2dst_bd_variance += delta*((double)data[i] - flow->src2dst_bd_mean);
+ } else {
+ flow->dst2src_num_bytes += 1;
+ delta = ((double)data[i] - flow->dst2src_bd_mean);
+ flow->dst2src_bd_mean += delta/((double)flow->dst2src_num_bytes);
+ flow->dst2src_bd_variance += delta*((double)data[i] - flow->dst2src_bd_mean);
+ }
+ }
+}
+
+float
+ndpi_flow_get_byte_count_entropy(const uint32_t byte_count[256],
+ unsigned int num_bytes)
+{
+ int i;
+ float tmp, sum = 0.0;
+
+ for (i=0; i<256; i++) {
+ tmp = (float) byte_count[i] / (float) num_bytes;
+ if (tmp > FLT_EPSILON) {
+ sum -= tmp * logf(tmp);
+ }
+ }
+ return sum / logf(2.0);
+}
+
/* ***************************************************** */
static void patchIPv6Address(char *str) {
@@ -309,11 +400,13 @@ static struct ndpi_flow_info *get_ndpi_flow_info(struct ndpi_workflow * workflow
u_int8_t *proto,
u_int8_t **payload,
u_int16_t *payload_len,
- u_int8_t *src_to_dst_direction) {
+ u_int8_t *src_to_dst_direction,
+ struct timeval when) {
u_int32_t idx, l4_offset, hashval;
struct ndpi_flow_info flow;
void *ret;
const u_int8_t *l3, *l4;
+ u_int32_t l4_data_len = 0XFEEDFACE;
/*
Note: to keep things simple (ndpiReader is just a demo app)
@@ -363,6 +456,7 @@ static struct ndpi_flow_info *get_ndpi_flow_info(struct ndpi_workflow * workflow
tcp_len = ndpi_min(4*(*tcph)->doff, l4_packet_len);
*payload = (u_int8_t*)&l4[tcp_len];
*payload_len = ndpi_max(0, l4_packet_len-4*(*tcph)->doff);
+ l4_data_len = l4_packet_len - sizeof(struct ndpi_tcphdr);
} else if(iph->protocol == IPPROTO_UDP && l4_packet_len >= 8) {
// udp
@@ -371,9 +465,11 @@ static struct ndpi_flow_info *get_ndpi_flow_info(struct ndpi_workflow * workflow
*sport = ntohs((*udph)->source), *dport = ntohs((*udph)->dest);
*payload = (u_int8_t*)&l4[sizeof(struct ndpi_udphdr)];
*payload_len = (l4_packet_len > sizeof(struct ndpi_udphdr)) ? l4_packet_len-sizeof(struct ndpi_udphdr) : 0;
+ l4_data_len = l4_packet_len - sizeof(struct ndpi_udphdr);
} else {
// non tcp/udp protocols
*sport = *dport = 0;
+ l4_data_len = 0;
}
flow.protocol = iph->protocol, flow.vlan_id = vlan_id;
@@ -459,7 +555,15 @@ static struct ndpi_flow_info *get_ndpi_flow_info(struct ndpi_workflow * workflow
workflow->stats.ndpi_flow_count++;
*src = newflow->src_id, *dst = newflow->dst_id;
-
+ newflow->src2dst_pkt_len[newflow->src2dst_pkt_count] = l4_packet_len;
+ newflow->src2dst_pkt_time[newflow->src2dst_pkt_count] = when;
+ if (newflow->src2dst_pkt_count == 0) {
+ newflow->src2dst_start = when;
+ }
+ newflow->src2dst_pkt_count++;
+ if (l4_data_len != 0XFEEDFACE) {
+ newflow->src2dst_opackets++;
+ }
return newflow;
}
} else {
@@ -485,6 +589,28 @@ static struct ndpi_flow_info *get_ndpi_flow_info(struct ndpi_workflow * workflow
else
*src = flow->dst_id, *dst = flow->src_id, *src_to_dst_direction = 0, flow->bidirectional = 1;
}
+ if (src_to_dst_direction) {
+ if (flow->src2dst_pkt_count < MAX_NUM_PKTS) {
+ flow->src2dst_pkt_len[flow->src2dst_pkt_count] = l4_packet_len;
+ flow->src2dst_pkt_time[flow->src2dst_pkt_count] = when;
+ flow->src2dst_pkt_count++;
+ }
+ if (l4_data_len != 0XFEEDFACE) {
+ flow->src2dst_opackets++;
+ }
+ } else {
+ if (flow->dst2src_pkt_count < MAX_NUM_PKTS) {
+ flow->dst2src_pkt_len[flow->dst2src_pkt_count] = l4_packet_len;
+ flow->dst2src_pkt_time[flow->dst2src_pkt_count] = when;
+ if (flow->dst2src_pkt_count == 0) {
+ flow->dst2src_start = when;
+ }
+ flow->dst2src_pkt_count++;
+ }
+ if (l4_data_len != 0XFEEDFACE) {
+ flow->dst2src_opackets++;
+ }
+ }
return flow;
}
}
@@ -503,7 +629,8 @@ static struct ndpi_flow_info *get_ndpi_flow_info6(struct ndpi_workflow * workflo
u_int8_t *proto,
u_int8_t **payload,
u_int16_t *payload_len,
- u_int8_t *src_to_dst_direction) {
+ u_int8_t *src_to_dst_direction,
+ struct timeval when) {
struct ndpi_iphdr iph;
memset(&iph, 0, sizeof(iph));
@@ -523,12 +650,33 @@ static struct ndpi_flow_info *get_ndpi_flow_info6(struct ndpi_workflow * workflo
ntohs(iph6->ip6_hdr.ip6_un1_plen),
tcph, udph, sport, dport,
src, dst, proto, payload,
- payload_len, src_to_dst_direction));
+ payload_len, src_to_dst_direction, when));
}
/* ****************************************************** */
void process_ndpi_collected_info(struct ndpi_workflow * workflow, struct ndpi_flow_info *flow) {
+
+ /* Update SPLT scores. */
+ if (flow->bidirectional) {
+ flow->score = ndpi_classify(flow->src2dst_pkt_len, flow->src2dst_pkt_time,
+ flow->dst2src_pkt_len, flow->dst2src_pkt_time,
+ flow->src2dst_start, flow->dst2src_start,
+ MAX_NUM_PKTS, flow->src_port, flow->dst_port,
+ flow->src2dst_packets, flow->dst2src_packets,
+ flow->src2dst_opackets, flow->dst2src_opackets,
+ flow->src2dst_l4_bytes, flow->dst2src_l4_bytes, 1,
+ flow->src2dst_byte_count, flow->dst2src_byte_count);
+ } else {
+ flow->score = ndpi_classify(flow->src2dst_pkt_len, flow->src2dst_pkt_time,
+ NULL, NULL, flow->src2dst_start, flow->src2dst_start,
+ MAX_NUM_PKTS, flow->src_port, flow->dst_port,
+ flow->src2dst_packets, 0,
+ flow->src2dst_opackets, 0,
+ flow->src2dst_l4_bytes, 0, 1,
+ flow->src2dst_byte_count, NULL);
+ }
+
if(!flow->ndpi_flow) return;
snprintf(flow->host_server_name, sizeof(flow->host_server_name), "%s",
@@ -613,7 +761,8 @@ static struct ndpi_proto packet_processing(struct ndpi_workflow * workflow,
u_int16_t ip_offset,
u_int16_t ipsize, u_int16_t rawsize,
const struct pcap_pkthdr *header,
- const u_char *packet) {
+ const u_char *packet,
+ struct timeval when) {
struct ndpi_id_struct *src, *dst;
struct ndpi_flow_info *flow = NULL;
struct ndpi_flow_struct *ndpi_flow = NULL;
@@ -631,12 +780,12 @@ static struct ndpi_proto packet_processing(struct ndpi_workflow * workflow,
ntohs(iph->tot_len) - (iph->ihl * 4),
&tcph, &udph, &sport, &dport,
&src, &dst, &proto,
- &payload, &payload_len, &src_to_dst_direction);
+ &payload, &payload_len, &src_to_dst_direction, when);
else
flow = get_ndpi_flow_info6(workflow, vlan_id, iph6, ip_offset,
&tcph, &udph, &sport, &dport,
&src, &dst, &proto,
- &payload, &payload_len, &src_to_dst_direction);
+ &payload, &payload_len, &src_to_dst_direction, when);
if(flow != NULL) {
workflow->stats.ip_packet_count++;
@@ -644,11 +793,20 @@ static struct ndpi_proto packet_processing(struct ndpi_workflow * workflow,
workflow->stats.total_ip_bytes += rawsize;
ndpi_flow = flow->ndpi_flow;
- if(src_to_dst_direction)
+ if(src_to_dst_direction) {
flow->src2dst_packets++, flow->src2dst_bytes += rawsize;
- else
+ flow->src2dst_l4_bytes += payload_len;
+ } else {
flow->dst2src_packets++, flow->dst2src_bytes += rawsize;
+ flow->dst2src_l4_bytes += payload_len;
+ }
+ if(enable_joy_stats) {
+ /* Update BD, distribution and mean. */
+ ndpi_flow_update_byte_count(flow, payload, payload_len, src_to_dst_direction);
+ ndpi_flow_update_byte_dist_mean_var(flow, payload, payload_len, src_to_dst_direction);
+ }
+
flow->last_seen = time;
if(!flow->has_human_readeable_strings) {
@@ -1070,7 +1228,7 @@ iph_check:
/* process the packet */
return(packet_processing(workflow, time, vlan_id, iph, iph6,
ip_offset, header->caplen - ip_offset,
- header->caplen, header, packet));
+ header->caplen, header, packet, header->ts));
}
/* ********************************************************** */
diff --git a/example/reader_util.h b/example/reader_util.h
index b006fd8d3..c4932118c 100644
--- a/example/reader_util.h
+++ b/example/reader_util.h
@@ -31,6 +31,7 @@
#include "uthash.h"
#include <pcap.h>
+#include "ndpi_classify.h"
#ifdef USE_DPDK
#include <rte_eal.h>
@@ -50,6 +51,14 @@
extern int dpdk_port_init(int port, struct rte_mempool *mbuf_pool);
#endif
+/* ETTA Spec defiintions for feature readiness */
+#define ETTA_MIN_PACKETS 10
+#define ETTA_MIN_OCTETS 4000
+/** maximum line length */
+#define LINEMAX 512
+#define MAX_BYTE_COUNT_ARRAY_LENGTH 256
+#define MAX_NUM_PKTS 100
+
#define MAX_NUM_READER_THREADS 16
#define IDLE_SCAN_PERIOD 10 /* msec (use TICK_RESOLUTION = 1000) */
#define MAX_IDLE_TIME 30000
@@ -128,6 +137,7 @@ typedef struct ndpi_flow_info {
u_int64_t last_seen;
u_int64_t src2dst_bytes, dst2src_bytes;
u_int32_t src2dst_packets, dst2src_packets;
+ u_int32_t src2dst_opackets, dst2src_opackets;
u_int32_t has_human_readeable_strings;
char human_readeable_string_buffer[32];
@@ -148,6 +158,27 @@ typedef struct ndpi_flow_info {
} ssh_ssl;
void *src_id, *dst_id;
+
+ // Entropy fields
+ u_int16_t src2dst_pkt_len[MAX_NUM_PKTS]; /*!< array of packet appdata lengths */
+ struct timeval src2dst_pkt_time[MAX_NUM_PKTS]; /*!< array of arrival times */
+ u_int16_t dst2src_pkt_len[MAX_NUM_PKTS]; /*!< array of packet appdata lengths */
+ struct timeval dst2src_pkt_time[MAX_NUM_PKTS]; /*!< array of arrival times */
+ struct timeval src2dst_start; /*!< first packet arrival time */
+ struct timeval dst2src_start; /*!< first packet arrival time */
+ u_int16_t src2dst_pkt_count; /*!< packet counts */
+ u_int16_t dst2src_pkt_count; /*!< packet counts */
+ u_int32_t src2dst_l4_bytes; /*!< packet counts */
+ u_int32_t dst2src_l4_bytes; /*!< packet counts */
+ u_int32_t src2dst_byte_count[256]; /*!< number of occurences of each byte */
+ u_int32_t dst2src_byte_count[256]; /*!< number of occurences of each byte */
+ u_int32_t src2dst_num_bytes;
+ u_int32_t dst2src_num_bytes;
+ double src2dst_bd_mean;
+ double src2dst_bd_variance;
+ double dst2src_bd_mean;
+ double dst2src_bd_variance;
+ float score;
} ndpi_flow_info_t;
@@ -245,6 +276,7 @@ void process_ndpi_collected_info(struct ndpi_workflow * workflow, struct ndpi_fl
u_int32_t ethernet_crc32(const void* data, size_t n_bytes);
void ndpi_flow_info_freer(void *node);
const char* print_cipher_id(u_int32_t cipher);
+float ndpi_flow_get_byte_count_entropy(const uint32_t byte_count[256], unsigned int num_bytes);
extern int nDPI_LogLevel;