diff options
-rw-r--r-- | example/ndpiReader.c | 2 | ||||
-rw-r--r-- | src/include/ndpi_typedefs.h | 10 | ||||
-rw-r--r-- | src/lib/ndpi_main.c | 252 | ||||
-rw-r--r-- | src/lib/third_party/include/hash.h | 27 | ||||
-rw-r--r-- | src/lib/third_party/src/hash.c | 189 |
5 files changed, 372 insertions, 108 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c index c3b49aa41..76fad23dd 100644 --- a/example/ndpiReader.c +++ b/example/ndpiReader.c @@ -1464,6 +1464,8 @@ static void setupDetection(u_int16_t thread_id, pcap_t * pcap_handle) { ndpi_pref_http_dont_dissect_response, 0); ndpi_set_detection_preferences(ndpi_thread_info[thread_id].workflow->ndpi_struct, ndpi_pref_dns_dissect_response, 0); + ndpi_set_detection_preferences(ndpi_thread_info[thread_id].workflow->ndpi_struct, + ndpi_pref_enable_category_substring_match, 0); ndpi_workflow_set_flow_detected_callback(ndpi_thread_info[thread_id].workflow, on_protocol_discovered, (void *)(uintptr_t)thread_id); diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h index 33b898c20..2bffb4001 100644 --- a/src/include/ndpi_typedefs.h +++ b/src/include/ndpi_typedefs.h @@ -1,7 +1,7 @@ /* * ndpi_typedefs.h * - * Copyright (C) 2011-16 - ntop.org + * Copyright (C) 2011-18 - ntop.org * * This file is part of nDPI, an open source deep packet inspection * library based on the OpenDPI and PACE technology by ipoque GmbH @@ -801,7 +801,8 @@ typedef enum { ndpi_pref_http_dont_dissect_response = 0, ndpi_pref_dns_dissect_response, ndpi_pref_direction_detect_disable, - ndpi_pref_disable_metadata_export + ndpi_pref_disable_metadata_export, + ndpi_pref_enable_category_substring_match, } ndpi_detection_preference; /* ntop extensions */ @@ -844,6 +845,7 @@ struct hs_list { #endif #ifdef NDPI_LIB_COMPILATION + struct ndpi_detection_module_struct { NDPI_PROTOCOL_BITMASK detection_bitmask; NDPI_PROTOCOL_BITMASK generic_http_packet_bitmask; @@ -909,6 +911,7 @@ struct ndpi_detection_module_struct { #else ndpi_automa hostnames, hostnames_shadow; #endif + void *hostnames_hash; void *ipAddresses, *ipAddresses_shadow; /* Patricia */ u_int8_t categories_loaded; } custom_categories; @@ -961,7 +964,8 @@ struct ndpi_detection_module_struct { u_int8_t http_dont_dissect_response:1, dns_dissect_response:1, direction_detect_disable:1, /* disable internal detection of packet direction */ - disable_metadata_export:1 /* No metadata is exported */ + disable_metadata_export:1, /* No metadata is exported */ + enable_category_substring_match:1 /* Default is perfect match */ ; void *hyperscan; /* Intel Hyperscan */ diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index 43f8d93c6..28976fe56 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -43,6 +43,8 @@ #include "ndpi_content_match.c.inc" #include "third_party/include/ndpi_patricia.h" #include "third_party/src/ndpi_patricia.c" +#include "third_party/include/hash.h" +#include "third_party/src/hash.c" #ifdef HAVE_HYPERSCAN #include <hs.h> @@ -352,8 +354,21 @@ void * ndpi_calloc(unsigned long count, size_t size) /* ****************************************** */ -void ndpi_free(void *ptr) { if(_ndpi_free) _ndpi_free(ptr); else free(ptr); } -void ndpi_flow_free(void *ptr) { if(_ndpi_flow_free) _ndpi_flow_free(ptr); else ndpi_free_flow((struct ndpi_flow_struct *) ptr); } +void ndpi_free(void *ptr) { + if(_ndpi_free) + _ndpi_free(ptr); + else + free(ptr); +} + +/* ****************************************** */ + +void ndpi_flow_free(void *ptr) { + if(_ndpi_flow_free) + _ndpi_flow_free(ptr); + else + ndpi_free_flow((struct ndpi_flow_struct *) ptr); +} /* ****************************************** */ @@ -884,6 +899,10 @@ int ndpi_set_detection_preferences(struct ndpi_detection_module_struct *ndpi_mod ndpi_mod->disable_metadata_export = (u_int8_t)value; break; + case ndpi_pref_enable_category_substring_match: + ndpi_mod->enable_category_substring_match = (u_int8_t)value; + break; + default: return(-1); } @@ -2101,6 +2120,8 @@ struct ndpi_detection_module_struct *ndpi_init_detection_module(void) { ndpi_str->custom_categories.hostnames_shadow.ac_automa = ac_automata_init(ac_match_handler); #endif + ndpi_str->custom_categories.hostnames_hash = (void*)ht_create(65536); + ndpi_str->custom_categories.ipAddresses = ndpi_New_Patricia(32 /* IPv4 */); ndpi_str->custom_categories.ipAddresses_shadow = ndpi_New_Patricia(32 /* IPv4 */); @@ -2203,30 +2224,39 @@ static int hyperscanCustomEventHandler(unsigned int id, static int ndpi_match_custom_category(struct ndpi_detection_module_struct *ndpi_struct, char *name, unsigned long *id) { + if(!ndpi_struct->enable_category_substring_match) { + if(ndpi_struct->custom_categories.hostnames_hash == NULL) + return(-1); + else { + *id = (unsigned long)ht_get((hashtable_t*)ndpi_struct->custom_categories.hostnames_hash, name); + return(0); + } + } else { #ifdef HAVE_HYPERSCAN - if(ndpi_struct->custom_categories.hostnames == NULL) - return(-1); - else { - hs_error_t rc; + if(ndpi_struct->custom_categories.hostnames == NULL) + return(-1); + else { + hs_error_t rc; - *id = (unsigned long)-1; + *id = (unsigned long)-1; - rc = hs_scan(ndpi_struct->custom_categories.hostnames->database, - name, strlen(name), 0, - ndpi_struct->custom_categories.hostnames->scratch, - hyperscanCustomEventHandler, id); + rc = hs_scan(ndpi_struct->custom_categories.hostnames->database, + name, strlen(name), 0, + ndpi_struct->custom_categories.hostnames->scratch, + hyperscanCustomEventHandler, id); - if(rc == HS_SCAN_TERMINATED) { + if(rc == HS_SCAN_TERMINATED) { #ifdef DEBUG - printf("[HS] Found category %lu for %s\n", *id, name); + printf("[HS] Found category %lu for %s\n", *id, name); #endif - return(0); - } else - return(-1); - } + return(0); + } else + return(-1); + } #else - return(ndpi_match_string_id(ndpi_struct->custom_categories.hostnames.ac_automa, name, id)); + return(ndpi_match_string_id(ndpi_struct->custom_categories.hostnames.ac_automa, name, id)); #endif + } } /* *********************************************** */ @@ -2295,6 +2325,9 @@ void ndpi_exit_detection_module(struct ndpi_detection_module_struct *ndpi_struct if(ndpi_struct->custom_categories.ipAddresses_shadow != NULL) ndpi_Destroy_Patricia((patricia_tree_t*)ndpi_struct->custom_categories.ipAddresses_shadow, free_ptree_data); + if(ndpi_struct->custom_categories.hostnames_hash) + ht_free((hashtable_t*)ndpi_struct->custom_categories.hostnames_hash); + ndpi_free(ndpi_struct); } } @@ -3887,133 +3920,142 @@ void ndpi_load_ip_category(struct ndpi_detection_module_struct *ndpi_struct, int ndpi_load_hostname_category(struct ndpi_detection_module_struct *ndpi_struct, char *name, ndpi_protocol_category_t category) { - AC_PATTERN_t ac_pattern; - if(name == NULL) return(-1); - /* printf("===> Loading %s as %u\n", name, category); */ + if(!ndpi_struct->enable_category_substring_match) { + ht_set((hashtable_t*)ndpi_struct->custom_categories.hostnames_hash, name, (u_int16_t)category); + return(0); + } else { + AC_PATTERN_t ac_pattern; + + /* printf("===> Loading %s as %u\n", name, category); */ #ifdef HAVE_HYPERSCAN - { - struct hs_list *h = (struct hs_list*)malloc(sizeof(struct hs_list)); + { + struct hs_list *h = (struct hs_list*)malloc(sizeof(struct hs_list)); - if(h) { - char tmp[256]; - int i, j; + if(h) { + char tmp[256]; + int i, j; - for(i=0, j=0; (j<sizeof(tmp)) && (name[i] != '\0'); i++) { - if(name[i] == '.') - tmp[j++] = '\\'; + for(i=0, j=0; (j<sizeof(tmp)) && (name[i] != '\0'); i++) { + if(name[i] == '.') + tmp[j++] = '\\'; - tmp[j++] = name[i]; - } + tmp[j++] = name[i]; + } - tmp[j] = '\0'; + tmp[j] = '\0'; - h->expression = strdup(name), h->id = (unsigned int)category; - if(h->expression == NULL) { - free(h); - return(-2); - } + h->expression = strdup(name), h->id = (unsigned int)category; + if(h->expression == NULL) { + free(h); + return(-2); + } - h->next = ndpi_struct->custom_categories.to_load; - ndpi_struct->custom_categories.to_load = h; - ndpi_struct->custom_categories.num_to_load++; - } else - return(-1); - } + h->next = ndpi_struct->custom_categories.to_load; + ndpi_struct->custom_categories.to_load = h; + ndpi_struct->custom_categories.num_to_load++; + } else + return(-1); + } #else - if(ndpi_struct->custom_categories.hostnames_shadow.ac_automa == NULL) - return(-1); + if(ndpi_struct->custom_categories.hostnames_shadow.ac_automa == NULL) + return(-1); - ac_pattern.astring = name, ac_pattern.length = strlen(ac_pattern.astring); - ac_pattern.rep.number = (int)category; + ac_pattern.astring = name, ac_pattern.length = strlen(ac_pattern.astring); + ac_pattern.rep.number = (int)category; - ac_automata_add(ndpi_struct->custom_categories.hostnames_shadow.ac_automa, &ac_pattern); + ac_automata_add(ndpi_struct->custom_categories.hostnames_shadow.ac_automa, &ac_pattern); #endif - + } + return(0); } /* ********************************************************************************* */ int ndpi_enable_loaded_categories(struct ndpi_detection_module_struct *ndpi_str) { + if(!ndpi_str->enable_category_substring_match) { + ; /* Nothing to do */ + } else { #ifdef HAVE_HYPERSCAN - if(ndpi_str->custom_categories.num_to_load > 0) { - const char **expressions; - unsigned int *ids, i; - int rc; - struct hs_list *head = ndpi_str->custom_categories.to_load; - - expressions = (const char**)calloc(sizeof(char*), - ndpi_str->custom_categories.num_to_load+1); - if(!expressions) return(-1); - - ids = (unsigned int*)calloc(sizeof(unsigned int), - ndpi_str->custom_categories.num_to_load+1); - if(!ids) { - free(expressions); - return(-1); - } + if(ndpi_str->custom_categories.num_to_load > 0) { + const char **expressions; + unsigned int *ids, i; + int rc; + struct hs_list *head = ndpi_str->custom_categories.to_load; + + expressions = (const char**)calloc(sizeof(char*), + ndpi_str->custom_categories.num_to_load+1); + if(!expressions) return(-1); + + ids = (unsigned int*)calloc(sizeof(unsigned int), + ndpi_str->custom_categories.num_to_load+1); + if(!ids) { + free(expressions); + return(-1); + } - for(i=0; head != NULL; i++) { + for(i=0; head != NULL; i++) { #ifdef DEBUG - printf("[HS] Loading category %u for %s\n", head->id, head->expression); + printf("[HS] Loading category %u for %s\n", head->id, head->expression); #endif - expressions[i] = head->expression, ids[i] = head->id; - head = head->next; - } + expressions[i] = head->expression, ids[i] = head->id; + head = head->next; + } - free_hyperscan_memory(ndpi_str->custom_categories.hostnames); - ndpi_str->custom_categories.hostnames = (struct hs*)malloc(sizeof(struct hs)); + free_hyperscan_memory(ndpi_str->custom_categories.hostnames); + ndpi_str->custom_categories.hostnames = (struct hs*)malloc(sizeof(struct hs)); - if(ndpi_str->custom_categories.hostnames == NULL) { - free(expressions), free(ids); - return(-1); /* Failed */ - } + if(ndpi_str->custom_categories.hostnames == NULL) { + free(expressions), free(ids); + return(-1); /* Failed */ + } - rc = hyperscan_load_patterns(ndpi_str->custom_categories.hostnames, - ndpi_str->custom_categories.num_to_load, - expressions, ids); - free(expressions), free(ids); + rc = hyperscan_load_patterns(ndpi_str->custom_categories.hostnames, + ndpi_str->custom_categories.num_to_load, + expressions, ids); + free(expressions), free(ids); - head = ndpi_str->custom_categories.to_load; - while(head != NULL) { - struct hs_list *next = head->next; + head = ndpi_str->custom_categories.to_load; + while(head != NULL) { + struct hs_list *next = head->next; - free(head->expression); - free(head); + free(head->expression); + free(head); - head = next; - } + head = next; + } - ndpi_str->custom_categories.to_load = NULL; + ndpi_str->custom_categories.to_load = NULL; - if(rc < 0) { - free(ndpi_str->custom_categories.hostnames); - ndpi_str->custom_categories.hostnames = NULL; - ndpi_str->custom_categories.hostnames = NULL; + if(rc < 0) { + free(ndpi_str->custom_categories.hostnames); + ndpi_str->custom_categories.hostnames = NULL; + } } - } #else - /* Free */ - ac_automata_release((AC_AUTOMATA_t*)ndpi_str->custom_categories.hostnames.ac_automa); - ndpi_Destroy_Patricia((patricia_tree_t*)ndpi_str->custom_categories.ipAddresses, free_ptree_data); + /* Free */ + ac_automata_release((AC_AUTOMATA_t*)ndpi_str->custom_categories.hostnames.ac_automa); + ndpi_Destroy_Patricia((patricia_tree_t*)ndpi_str->custom_categories.ipAddresses, free_ptree_data); - /* Finalize */ - ac_automata_finalize((AC_AUTOMATA_t*)ndpi_str->custom_categories.hostnames_shadow.ac_automa); + /* Finalize */ + ac_automata_finalize((AC_AUTOMATA_t*)ndpi_str->custom_categories.hostnames_shadow.ac_automa); - /* Swap */ - ndpi_str->custom_categories.hostnames.ac_automa = ndpi_str->custom_categories.hostnames_shadow.ac_automa; - ndpi_str->custom_categories.ipAddresses = ndpi_str->custom_categories.ipAddresses_shadow; + /* Swap */ + ndpi_str->custom_categories.hostnames.ac_automa = ndpi_str->custom_categories.hostnames_shadow.ac_automa; + ndpi_str->custom_categories.ipAddresses = ndpi_str->custom_categories.ipAddresses_shadow; - /* Realloc */ - ndpi_str->custom_categories.hostnames_shadow.ac_automa = ac_automata_init(ac_match_handler); - ndpi_str->custom_categories.ipAddresses_shadow = ndpi_New_Patricia(32 /* IPv4 */); + /* Realloc */ + ndpi_str->custom_categories.hostnames_shadow.ac_automa = ac_automata_init(ac_match_handler); + ndpi_str->custom_categories.ipAddresses_shadow = ndpi_New_Patricia(32 /* IPv4 */); #endif - + } + ndpi_str->custom_categories.categories_loaded = 1; + return(0); } diff --git a/src/lib/third_party/include/hash.h b/src/lib/third_party/include/hash.h new file mode 100644 index 000000000..8862671b1 --- /dev/null +++ b/src/lib/third_party/include/hash.h @@ -0,0 +1,27 @@ +/* Based on https://gist.github.com/tonious/1377667 */ + +#ifndef _HASH_H_ +#define _HASH_H_ + +struct entry_s { + char *key; + u_int16_t value; + struct entry_s *next; +}; + +typedef struct entry_s entry_t; + +struct hashtable_s { + int size; + struct entry_s **table; +}; + +typedef struct hashtable_s hashtable_t; + +extern hashtable_t *ht_create( int size ); +extern int ht_hash( hashtable_t *hashtable, char *key ); +extern entry_t *ht_newpair( char *key, u_int16_t value ); +extern void ht_set( hashtable_t *hashtable, char *key, u_int16_t value ); +extern u_int16_t ht_get( hashtable_t *hashtable, char *key ); + +#endif /* _HASH_H_ */ diff --git a/src/lib/third_party/src/hash.c b/src/lib/third_party/src/hash.c new file mode 100644 index 000000000..510fce7e5 --- /dev/null +++ b/src/lib/third_party/src/hash.c @@ -0,0 +1,189 @@ +/* Based on https://gist.github.com/tonious/1377667 */ + +#include <stdlib.h> +#include <stdio.h> +#include <limits.h> +#include <string.h> + +#include "hash.h" + +/* #define HASH_DEBUG 1 */ + +/* Create a new hashtable. */ +hashtable_t *ht_create( int size ) { + hashtable_t *hashtable = NULL; + int i; + + if( size < 1 ) return NULL; + + /* Allocate the table itself. */ + if( ( hashtable = ndpi_malloc( sizeof( hashtable_t ) ) ) == NULL ) { + return NULL; + } + + /* Allocate pointers to the head nodes. */ + if( ( hashtable->table = ndpi_malloc( sizeof( entry_t * ) * size ) ) == NULL ) { + return NULL; + } + for( i = 0; i < size; i++ ) { + hashtable->table[i] = NULL; + } + + hashtable->size = size; + + return hashtable; +} + +/* **************************************************** */ + +/* Hash a string for a particular hash table. */ +int ht_hash( hashtable_t *hashtable, char *key ) { + + unsigned long int hashval; + int i = 0; + + /* Convert our string to an integer */ + while( hashval < ULONG_MAX && i < strlen( key ) ) { + hashval = hashval << 8; + hashval += key[ i ]; + i++; + } + + return hashval % hashtable->size; +} + +/* **************************************************** */ + +/* Create a key-value pair. */ +entry_t *ht_newpair( char *key, u_int16_t value ) { + entry_t *newpair; + + if( ( newpair = ndpi_malloc( sizeof( entry_t ) ) ) == NULL ) { + return NULL; + } + + if( ( newpair->key = ndpi_strdup( key ) ) == NULL ) { + return NULL; + } + + newpair->value = value, newpair->next = NULL; + + return newpair; +} + +/* **************************************************** */ + +/* Insert a key-value pair into a hash table. */ +void ht_set( hashtable_t *hashtable, char *key, u_int16_t value ) { + int bin = 0; + entry_t *newpair = NULL; + entry_t *next = NULL; + entry_t *last = NULL; + +#ifdef HASH_DEBUG + printf("*** %s() %s = %u ***\n", __FUNCTION__, key, value); +#endif + + bin = ht_hash( hashtable, key ); + + next = hashtable->table[ bin ]; + + while( next != NULL && next->key != NULL && strcmp( key, next->key ) > 0 ) { + last = next; + next = next->next; + } + + /* There's already a pair. Let's replace that string. */ + if( next != NULL && next->key != NULL && strcmp( key, next->key ) == 0 ) { + next->value = value; + + /* Nope, could't find it. Time to grow a pair. */ + } else { + newpair = ht_newpair( key, value ); + + /* We're at the start of the linked list in this bin. */ + if( next == hashtable->table[ bin ] ) { + newpair->next = next; + hashtable->table[ bin ] = newpair; + + /* We're at the end of the linked list in this bin. */ + } else if ( next == NULL ) { + last->next = newpair; + + /* We're in the middle of the list. */ + } else { + newpair->next = next; + last->next = newpair; + } + } +} + +/* **************************************************** */ + +/* Retrieve a key-value pair from a hash table. */ +u_int16_t ht_get( hashtable_t *hashtable, char *key ) { + int bin = 0; + entry_t *pair; + +#ifdef HASH_DEBUG + printf("*** %s() %s = %u ***\n", __FUNCTION__, key, pair->value); +#endif + + bin = ht_hash( hashtable, key ); + + /* Step through the bin, looking for our value. */ + pair = hashtable->table[ bin ]; + while( pair != NULL && pair->key != NULL && strcmp( key, pair->key ) > 0 ) { + pair = pair->next; + } + + /* Did we actually find anything? */ + if( pair == NULL || pair->key == NULL || strcmp( key, pair->key ) != 0 ) { + return 0; + } else { + return pair->value; + } +} + +/* **************************************************** */ + +void ht_free(hashtable_t *hashtable) { + int i; + + for(i=0; i<hashtable->size; i++) { + struct entry_s *t = hashtable->table[i]; + + while(t != NULL) { + struct entry_s *next = t->next; + + ndpi_free(t->key); + ndpi_free(t); + + t = next; + } + } + + ndpi_free(hashtable); +} + +/* **************************************************** */ + +#ifdef HASH_TEST + +int main( int argc, char **argv ) { + hashtable_t *hashtable = ht_create( 65536 ); + + ht_set( hashtable, "key1", 32 ); + ht_set( hashtable, "key2", 34 ); + ht_set( hashtable, "key3", 124 ); + ht_set( hashtable, "key4", 98 ); + + printf( "%u\n", ht_get( hashtable, "key1" ) ); + printf( "%u\n", ht_get( hashtable, "key2" ) ); + printf( "%u\n", ht_get( hashtable, "key3" ) ); + printf( "%u\n", ht_get( hashtable, "key4" ) ); + + return 0; +} + +#endif |