diff options
author | Toni <matzeton@googlemail.com> | 2022-06-17 19:50:31 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-06-17 19:50:31 +0200 |
commit | 9c8b2d63dafc526ff3f0872e8e88e67f72d875d1 (patch) | |
tree | b71901c710a90ecaa115213960b5ffb7ed636317 | |
parent | 20a29c393f5cff3864a75070b2988fe1be1c6d17 (diff) |
Replaced nDPI's internal hashmap with uthash. (#1602)
Signed-off-by: lns <matzeton@googlemail.com>
-rw-r--r-- | example/ndpiReader.c | 18 | ||||
-rw-r--r-- | src/include/ndpi_api.h.in | 50 | ||||
-rw-r--r-- | src/include/ndpi_typedefs.h | 15 | ||||
-rw-r--r-- | src/lib/ndpi_utils.c | 146 | ||||
-rw-r--r-- | src/lib/third_party/include/uthash.h | 938 |
5 files changed, 618 insertions, 549 deletions
diff --git a/example/ndpiReader.c b/example/ndpiReader.c index 4ec3a528f..9fa63c8bf 100644 --- a/example/ndpiReader.c +++ b/example/ndpiReader.c @@ -4553,20 +4553,24 @@ void rsiUnitTest() { /* *********************************************** */ void hashUnitTest() { - ndpi_str_hash *h = ndpi_hash_alloc(16384); - char* dict[] = { "hello", "world", NULL }; + ndpi_str_hash *h; + char * const dict[] = { "hello", "world", NULL }; int i; - assert(h); + assert(ndpi_hash_init(&h) == 0); + assert(h == NULL); for(i=0; dict[i] != NULL; i++) { - u_int8_t l = strlen(dict[i]), v; + u_int8_t l = strlen(dict[i]); + int * v; - assert(ndpi_hash_add_entry(h, dict[i], l, i) == 0); - assert(ndpi_hash_find_entry(h, dict[i], l, &v) == 0); + assert(ndpi_hash_add_entry(&h, dict[i], l, &i) == 0); + assert(ndpi_hash_find_entry(h, dict[i], l, (void **)&v) == 0); + assert(v == (void *)&i && *v == i); } - ndpi_hash_free(h); + ndpi_hash_free(&h, NULL); + assert(h == NULL); } /* *********************************************** */ diff --git a/src/include/ndpi_api.h.in b/src/include/ndpi_api.h.in index 36d592b8d..ed1016aea 100644 --- a/src/include/ndpi_api.h.in +++ b/src/include/ndpi_api.h.in @@ -1693,10 +1693,52 @@ extern "C" { /* ******************************* */ - ndpi_str_hash* ndpi_hash_alloc(u_int32_t max_num_entries); - void ndpi_hash_free(ndpi_str_hash *h); - int ndpi_hash_find_entry(ndpi_str_hash *h, char *key, u_int key_len, u_int8_t *value); - int ndpi_hash_add_entry(ndpi_str_hash *h, char *key, u_int8_t key_len, u_int8_t value); + /** + * Initialize the hashmap. + * + * @par h = pointer to the hash map [in, out] + * + * @return 0 on success, 1 otherwise + * + */ + int ndpi_hash_init(ndpi_str_hash **h); + + /** + * Free the hashmap. + * + * @par h = pointer to the hash map [in, out] + * @par cleanup_func = pointer to a optional callback function + * called for each element in the hashmap [in] + * + */ + void ndpi_hash_free(ndpi_str_hash **h, void (*cleanup_func)(ndpi_str_hash *h)); + + /** + * Search for an entry in the hashmap. + * + * @par h = pointer to the hash map [in] + * @par key = character string (no '\0' required) [in] + * @par key_len = length of the character string @key [in] + * @par value = pointer to a pointer to the value, which contains a + * previously added hash entry [in, out] + * + * @return 0 if an entry with that key was found, 1 otherwise + * + */ + int ndpi_hash_find_entry(ndpi_str_hash *h, char *key, u_int key_len, void **value); + + /** + * Add an entry to the hashmap. + * + * @par h = pointer to the hash map [in, out] + * @par key = character string (no '\0' required) [in] + * @par key_len = length of the character string @key [in] + * @par value = pointer to the value to add [in] + * + * @return 0 if the entry was added, 1 otherwise + * + */ + int ndpi_hash_add_entry(ndpi_str_hash **h, char *key, u_int8_t key_len, void *value); /* ******************************* */ diff --git a/src/include/ndpi_typedefs.h b/src/include/ndpi_typedefs.h index 2f47a28b5..309351abf 100644 --- a/src/include/ndpi_typedefs.h +++ b/src/include/ndpi_typedefs.h @@ -1608,19 +1608,12 @@ struct ndpi_bin { /* **************************************** */ -struct ndpi_str_hash_info { - char *key; /* Key */ - u_int8_t key_len; - u_int8_t value; /* Value */ - struct ndpi_str_hash_info *next; -}; - -typedef struct { - u_int32_t num_buckets, max_num_entries; - struct ndpi_str_hash_info **buckets; +typedef struct ndpi_str_hash { + unsigned int hash; + void *value; + u_int8_t private_data[0]; } ndpi_str_hash; - /* **************************************** */ #define HW_HISTORY_LEN 4 diff --git a/src/lib/ndpi_utils.c b/src/lib/ndpi_utils.c index f2cb9a4d5..e5339712b 100644 --- a/src/lib/ndpi_utils.c +++ b/src/lib/ndpi_utils.c @@ -51,6 +51,7 @@ #include "third_party/include/libinjection.h" #include "third_party/include/libinjection_sqli.h" #include "third_party/include/libinjection_xss.h" +#include "third_party/include/uthash.h" #include "third_party/include/rce_injection.h" #define NDPI_CONST_GENERIC_PROTOCOL_NAME "GenericProtocol" @@ -68,6 +69,20 @@ struct pcre_struct { }; #endif +/* + * Please keep this strcture in sync with + * `struct ndpi_str_hash` in src/include/ndpi_typedefs.h + */ +typedef struct ndpi_str_hash_private { + unsigned int hash; + void *value; + UT_hash_handle hh; +} ndpi_str_hash_private; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +_Static_assert(sizeof(struct ndpi_str_hash) == sizeof(struct ndpi_str_hash_private) - sizeof(UT_hash_handle), + "Please keep `struct ndpi_str_hash` and `struct ndpi_str_hash_private` syncd."); +#endif + /* ****************************************** */ /* implementation of the punycode check function */ @@ -2063,106 +2078,79 @@ u_int32_t ndpi_quick_16_byte_hash(u_int8_t *in_16_bytes_long) { /* ******************************************************************** */ -ndpi_str_hash* ndpi_hash_alloc(u_int32_t max_num_entries) { - ndpi_str_hash *h = (ndpi_str_hash*)ndpi_malloc(sizeof(ndpi_str_hash)); - - if(!h) return(NULL); - if(max_num_entries < 1024) max_num_entries = 1024; - if(max_num_entries > 10000000) max_num_entries = 10000000; - - h->max_num_entries = max_num_entries, h->num_buckets = max_num_entries/2; - h->buckets = (struct ndpi_str_hash_info**)ndpi_calloc(sizeof(struct ndpi_str_hash_info*), h->num_buckets); - - if(h->buckets == NULL) { - ndpi_free(h); - return(NULL); - } else - return(h); -} - -/* ******************************************************************** */ - -void ndpi_hash_free(ndpi_str_hash *h) { - u_int32_t i; - - for(i=0; i<h->num_buckets; i++) { - struct ndpi_str_hash_info *head = h->buckets[i]; - - while(head != NULL) { - struct ndpi_str_hash_info *next = head->next; - - ndpi_free(head->key); - ndpi_free(head); - head = next; - } +int ndpi_hash_init(ndpi_str_hash **h) +{ + if (h == NULL) + { + return 1; } - ndpi_free(h->buckets); - ndpi_free(h); + *h = NULL; + return 0; } /* ******************************************************************** */ -static u_int32_t _ndpi_hash_function(ndpi_str_hash *h, char *key, u_int8_t key_len) { - u_int32_t hv = 0; - u_int8_t i; - - for(i=0; i<key_len; i++) - hv += key[i]*(i+1); - - return(hv % h->num_buckets); -} - -/* ******************************************************************** */ +void ndpi_hash_free(ndpi_str_hash **h, void (*cleanup_func)(ndpi_str_hash *h)) +{ + struct ndpi_str_hash_private *h_priv; + struct ndpi_str_hash_private *current, *tmp; -static int _ndpi_hash_find_entry(ndpi_str_hash *h, u_int32_t hashval, char *key, u_int key_len, u_int8_t *value) { - struct ndpi_str_hash_info *head = h->buckets[hashval]; + if (h == NULL) + { + return; + } + h_priv = *(struct ndpi_str_hash_private **)h; - while(head != NULL) { - if((head->key_len == key_len) && (memcmp(head->key, key, key_len) == 0)) { - *value = head->value; - return(0); /* Found */ + HASH_ITER(hh, h_priv, current, tmp) { + HASH_DEL(h_priv, current); + if (cleanup_func != NULL) + { + cleanup_func((ndpi_str_hash *)current); } - - head = head-> next; + free(current); } - return(-1); /* Not found */ + *h = NULL; } /* ******************************************************************** */ -int ndpi_hash_find_entry(ndpi_str_hash *h, char *key, u_int key_len, u_int8_t *value) { - u_int32_t hv = _ndpi_hash_function(h, key, key_len); - - return(_ndpi_hash_find_entry(h, hv, key, key_len, value)); +int ndpi_hash_find_entry(ndpi_str_hash *h, char *key, u_int key_len, void **value) +{ + struct ndpi_str_hash_private *h_priv = (struct ndpi_str_hash_private *)h; + struct ndpi_str_hash_private *found; + unsigned int hash_value; + + HASH_VALUE(key, key_len, hash_value); + HASH_FIND_INT(h_priv, &hash_value, found); + if (found != NULL) + { + *value = found->value; + return 0; + } else { + return 1; + } } /* ******************************************************************** */ -int ndpi_hash_add_entry(ndpi_str_hash *h, char *key, u_int8_t key_len, u_int8_t value) { - u_int32_t hv = _ndpi_hash_function(h, key, key_len); - u_int8_t ret_value; - int rc = _ndpi_hash_find_entry(h, hv, key, key_len, &ret_value); - - if(rc == -1) { - /* Not found */ - struct ndpi_str_hash_info *e = (struct ndpi_str_hash_info*)ndpi_malloc(sizeof(struct ndpi_str_hash_info)); - - if(e == NULL) - return(-2); - - if((e->key = (char*)ndpi_malloc(key_len)) == NULL) - return(-3); +int ndpi_hash_add_entry(ndpi_str_hash **h, char *key, u_int8_t key_len, void *value) +{ + struct ndpi_str_hash_private **h_priv = (struct ndpi_str_hash_private **)h; + struct ndpi_str_hash_private *new = ndpi_calloc(1, sizeof(*new)); + unsigned int hash_value; - memcpy(e->key, key, key_len); - e->key_len = key_len, e->value = value; - e->next = h->buckets[hv]; - h->buckets[hv] = e; + if (new == NULL) + { + return 1; + } - return(0); - } else - return(0); + HASH_VALUE(key, key_len, hash_value); + new->hash = hash_value; + new->value = value; + HASH_ADD_INT(*h_priv, hash, new); + return 0; } /* ********************************************************************************* */ diff --git a/src/lib/third_party/include/uthash.h b/src/lib/third_party/include/uthash.h index c9b7812f3..49c69df03 100644 --- a/src/lib/third_party/include/uthash.h +++ b/src/lib/third_party/include/uthash.h @@ -1,5 +1,5 @@ /* -Copyright (c) 2003-2017, Troy D. Hanson http://troydhanson.github.com/uthash/ +Copyright (c) 2003-2022, Troy D. Hanson https://troydhanson.github.io/uthash/ All rights reserved. Redistribution and use in source and binary forms, with or without @@ -24,11 +24,21 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef UTHASH_H #define UTHASH_H -#define UTHASH_VERSION 2.0.2 +#define UTHASH_VERSION 2.3.0 -#include <string.h> /* memcmp,strlen */ +#include <string.h> /* memcmp, memset, strlen */ #include <stddef.h> /* ptrdiff_t */ -#include <stdlib.h> /* exit() */ +#include <stdlib.h> /* exit */ + +#if defined(HASH_DEFINE_OWN_STDINT) && HASH_DEFINE_OWN_STDINT +/* This codepath is provided for backward compatibility, but I plan to remove it. */ +#warning "HASH_DEFINE_OWN_STDINT is deprecated; please use HASH_NO_STDINT instead" +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#elif defined(HASH_NO_STDINT) && HASH_NO_STDINT +#else +#include <stdint.h> /* uint8_t, uint32_t */ +#endif /* These macros use decltype or the earlier __typeof GNU extension. As decltype is only available in newer compilers (VS2010 or gcc 4.3+ @@ -62,37 +72,25 @@ do { } while (0) #endif -/* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */ -#if defined(WIN32) -#if defined(_MSC_VER) && _MSC_VER >= 1600 -#include <stdint.h> -#elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__) -#include <stdint.h> -#else -typedef unsigned int uint32_t; -typedef unsigned char uint8_t; -#endif -#elif defined(__GNUC__) && !defined(__VXWORKS__) -#include <stdint.h> -#else -typedef unsigned int uint32_t; -typedef unsigned char uint8_t; -#endif - -#ifndef uthash_fatal -#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ -#endif #ifndef uthash_malloc #define uthash_malloc(sz) malloc(sz) /* malloc fcn */ #endif #ifndef uthash_free #define uthash_free(ptr,sz) free(ptr) /* free fcn */ #endif +#ifndef uthash_bzero +#define uthash_bzero(a,n) memset(a,'\0',n) +#endif #ifndef uthash_strlen #define uthash_strlen(s) strlen(s) #endif -#ifndef uthash_memcmp -#define uthash_memcmp(a,b,n) memcmp(a,b,n) + +#ifndef HASH_FUNCTION +#define HASH_FUNCTION(keyptr,keylen,hashv) HASH_JEN(keyptr, keylen, hashv) +#endif + +#ifndef HASH_KEYCMP +#define HASH_KEYCMP(a,b,n) memcmp(a,b,n) #endif #ifndef uthash_noexpand_fyi @@ -102,6 +100,32 @@ typedef unsigned char uint8_t; #define uthash_expand_fyi(tbl) /* can be defined to log expands */ #endif +#ifndef HASH_NONFATAL_OOM +#define HASH_NONFATAL_OOM 0 +#endif + +#if HASH_NONFATAL_OOM +/* malloc failures can be recovered from */ + +#ifndef uthash_nonfatal_oom +#define uthash_nonfatal_oom(obj) do {} while (0) /* non-fatal OOM error */ +#endif + +#define HASH_RECORD_OOM(oomed) do { (oomed) = 1; } while (0) +#define IF_HASH_NONFATAL_OOM(x) x + +#else +/* malloc failures result in lost memory, hash tables are unusable */ + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal OOM error */ +#endif + +#define HASH_RECORD_OOM(oomed) uthash_fatal("out of memory") +#define IF_HASH_NONFATAL_OOM(x) + +#endif + /* initial number of buckets */ #define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ #define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ @@ -110,11 +134,21 @@ typedef unsigned char uint8_t; /* calculate the element whose hash handle address is hhp */ #define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) /* calculate the hash handle from element address elp */ -#define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle *)(((char*)(elp)) + ((tbl)->hho))) +#define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle*)(void*)(((char*)(elp)) + ((tbl)->hho))) + +#define HASH_ROLLBACK_BKT(hh, head, itemptrhh) \ +do { \ + struct UT_hash_handle *_hd_hh_item = (itemptrhh); \ + unsigned _hd_bkt; \ + HASH_TO_BKT(_hd_hh_item->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + (head)->hh.tbl->buckets[_hd_bkt].count++; \ + _hd_hh_item->hh_next = NULL; \ + _hd_hh_item->hh_prev = NULL; \ +} while (0) #define HASH_VALUE(keyptr,keylen,hashv) \ do { \ - HASH_FCN(keyptr, keylen, hashv); \ + HASH_FUNCTION(keyptr, keylen, hashv); \ } while (0) #define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out) \ @@ -131,21 +165,27 @@ do { #define HASH_FIND(hh,head,keyptr,keylen,out) \ do { \ - unsigned _hf_hashv; \ - HASH_VALUE(keyptr, keylen, _hf_hashv); \ - HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ + (out) = NULL; \ + if (head) { \ + unsigned _hf_hashv; \ + HASH_VALUE(keyptr, keylen, _hf_hashv); \ + HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ + } \ } while (0) #ifdef HASH_BLOOM #define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) #define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) -#define HASH_BLOOM_MAKE(tbl) \ +#define HASH_BLOOM_MAKE(tbl,oomed) \ do { \ (tbl)->bloom_nbits = HASH_BLOOM; \ (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ - if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ - memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ - (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ + if (!(tbl)->bloom_bv) { \ + HASH_RECORD_OOM(oomed); \ + } else { \ + uthash_bzero((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ + } \ } while (0) #define HASH_BLOOM_FREE(tbl) \ @@ -157,36 +197,49 @@ do { #define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U))) #define HASH_BLOOM_ADD(tbl,hashv) \ - HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) + HASH_BLOOM_BITSET((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) #define HASH_BLOOM_TEST(tbl,hashv) \ - HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) + HASH_BLOOM_BITTEST((tbl)->bloom_bv, ((hashv) & (uint32_t)((1UL << (tbl)->bloom_nbits) - 1U))) #else -#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_MAKE(tbl,oomed) #define HASH_BLOOM_FREE(tbl) #define HASH_BLOOM_ADD(tbl,hashv) #define HASH_BLOOM_TEST(tbl,hashv) (1) #define HASH_BLOOM_BYTELEN 0U #endif -#define HASH_MAKE_TABLE(hh,head) \ +#define HASH_MAKE_TABLE(hh,head,oomed) \ do { \ - (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ - sizeof(UT_hash_table)); \ - if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ - memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ - (head)->hh.tbl->tail = &((head)->hh); \ - (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ - (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ - (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ - (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ - HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ - if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ - memset((head)->hh.tbl->buckets, 0, \ - HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ - HASH_BLOOM_MAKE((head)->hh.tbl); \ - (head)->hh.tbl->signature = HASH_SIGNATURE; \ + (head)->hh.tbl = (UT_hash_table*)uthash_malloc(sizeof(UT_hash_table)); \ + if (!(head)->hh.tbl) { \ + HASH_RECORD_OOM(oomed); \ + } else { \ + uthash_bzero((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ + if (!(head)->hh.tbl->buckets) { \ + HASH_RECORD_OOM(oomed); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + } else { \ + uthash_bzero((head)->hh.tbl->buckets, \ + HASH_INITIAL_NUM_BUCKETS * sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl, oomed); \ + IF_HASH_NONFATAL_OOM( \ + if (oomed) { \ + uthash_free((head)->hh.tbl->buckets, \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + } \ + ) \ + } \ + } \ } while (0) #define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \ @@ -194,7 +247,7 @@ do { (replaced) = NULL; \ HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ if (replaced) { \ - HASH_DELETE(hh, head, replaced); \ + HASH_DELETE(hh, head, replaced); \ } \ HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \ } while (0) @@ -204,7 +257,7 @@ do { (replaced) = NULL; \ HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ if (replaced) { \ - HASH_DELETE(hh, head, replaced); \ + HASH_DELETE(hh, head, replaced); \ } \ HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \ } while (0) @@ -234,8 +287,9 @@ do { #define HASH_AKBI_INNER_LOOP(hh,head,add,cmpfcn) \ do { \ do { \ - if (cmpfcn(DECLTYPE(head)(_hs_iter), add) > 0) \ + if (cmpfcn(DECLTYPE(head)(_hs_iter), add) > 0) { \ break; \ + } \ } while ((_hs_iter = HH_FROM_ELMT((head)->hh.tbl, _hs_iter)->next)); \ } while (0) @@ -255,17 +309,58 @@ do { } while (0) #endif -#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \ +#if HASH_NONFATAL_OOM + +#define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed) \ +do { \ + if (!(oomed)) { \ + unsigned _ha_bkt; \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ + if (oomed) { \ + HASH_ROLLBACK_BKT(hh, head, &(add)->hh); \ + HASH_DELETE_HH(hh, head, &(add)->hh); \ + (add)->hh.tbl = NULL; \ + uthash_nonfatal_oom(add); \ + } else { \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + } \ + } else { \ + (add)->hh.tbl = NULL; \ + uthash_nonfatal_oom(add); \ + } \ +} while (0) + +#else + +#define HASH_ADD_TO_TABLE(hh,head,keyptr,keylen_in,hashval,add,oomed) \ do { \ unsigned _ha_bkt; \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], hh, &(add)->hh, oomed); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ +} while (0) + +#endif + + +#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \ +do { \ + IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; ) \ (add)->hh.hashv = (hashval); \ (add)->hh.key = (char*) (keyptr); \ (add)->hh.keylen = (unsigned) (keylen_in); \ if (!(head)) { \ (add)->hh.next = NULL; \ (add)->hh.prev = NULL; \ - (head) = (add); \ - HASH_MAKE_TABLE(hh, head); \ + HASH_MAKE_TABLE(hh, add, _ha_oomed); \ + IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { ) \ + (head) = (add); \ + IF_HASH_NONFATAL_OOM( } ) \ } else { \ void *_hs_iter = (head); \ (add)->hh.tbl = (head)->hh.tbl; \ @@ -282,12 +377,8 @@ do { HASH_APPEND_LIST(hh, head, add); \ } \ } \ - (head)->hh.tbl->num_items++; \ - HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ - HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh); \ - HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ - HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ - HASH_FSCK(hh, head); \ + HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ + HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE_INORDER"); \ } while (0) #define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn) \ @@ -305,25 +396,23 @@ do { #define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add) \ do { \ - unsigned _ha_bkt; \ + IF_HASH_NONFATAL_OOM( int _ha_oomed = 0; ) \ (add)->hh.hashv = (hashval); \ - (add)->hh.key = (char*) (keyptr); \ + (add)->hh.key = (const void*) (keyptr); \ (add)->hh.keylen = (unsigned) (keylen_in); \ if (!(head)) { \ (add)->hh.next = NULL; \ (add)->hh.prev = NULL; \ - (head) = (add); \ - HASH_MAKE_TABLE(hh, head); \ + HASH_MAKE_TABLE(hh, add, _ha_oomed); \ + IF_HASH_NONFATAL_OOM( if (!_ha_oomed) { ) \ + (head) = (add); \ + IF_HASH_NONFATAL_OOM( } ) \ } else { \ (add)->hh.tbl = (head)->hh.tbl; \ HASH_APPEND_LIST(hh, head, add); \ } \ - (head)->hh.tbl->num_items++; \ - HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ - HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh); \ - HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ - HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ - HASH_FSCK(hh, head); \ + HASH_ADD_TO_TABLE(hh, head, keyptr, keylen_in, hashval, add, _ha_oomed); \ + HASH_FSCK(hh, head, "HASH_ADD_KEYPTR_BYHASHVALUE"); \ } while (0) #define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ @@ -357,48 +446,53 @@ do { * scratch pointer rather than through the repointed (users) symbol. */ #define HASH_DELETE(hh,head,delptr) \ + HASH_DELETE_HH(hh, head, &(delptr)->hh) + +#define HASH_DELETE_HH(hh,head,delptrhh) \ do { \ - struct UT_hash_handle *_hd_hh_del; \ - if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ - uthash_free((head)->hh.tbl->buckets, \ - (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ - HASH_BLOOM_FREE((head)->hh.tbl); \ - uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ - head = NULL; \ + struct UT_hash_handle *_hd_hh_del = (delptrhh); \ + if ((_hd_hh_del->prev == NULL) && (_hd_hh_del->next == NULL)) { \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head) = NULL; \ + } else { \ + unsigned _hd_bkt; \ + if (_hd_hh_del == (head)->hh.tbl->tail) { \ + (head)->hh.tbl->tail = HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev); \ + } \ + if (_hd_hh_del->prev != NULL) { \ + HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->prev)->next = _hd_hh_del->next; \ } else { \ - unsigned _hd_bkt; \ - _hd_hh_del = &((delptr)->hh); \ - if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ - (head)->hh.tbl->tail = \ - (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ - (head)->hh.tbl->hho); \ - } \ - if ((delptr)->hh.prev != NULL) { \ - ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ - (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ - } else { \ - DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ - } \ - if (_hd_hh_del->next != NULL) { \ - ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \ - (head)->hh.tbl->hho))->prev = \ - _hd_hh_del->prev; \ - } \ - HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ - HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ - (head)->hh.tbl->num_items--; \ + DECLTYPE_ASSIGN(head, _hd_hh_del->next); \ + } \ + if (_hd_hh_del->next != NULL) { \ + HH_FROM_ELMT((head)->hh.tbl, _hd_hh_del->next)->prev = _hd_hh_del->prev; \ } \ - HASH_FSCK(hh,head); \ + HASH_TO_BKT(_hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT((head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh, head, "HASH_DELETE_HH"); \ } while (0) - /* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ #define HASH_FIND_STR(head,findstr,out) \ - HASH_FIND(hh,head,findstr,(unsigned)uthash_strlen(findstr),out) +do { \ + unsigned _uthash_hfstr_keylen = (unsigned)uthash_strlen(findstr); \ + HASH_FIND(hh, head, findstr, _uthash_hfstr_keylen, out); \ +} while (0) #define HASH_ADD_STR(head,strfield,add) \ - HASH_ADD(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add) +do { \ + unsigned _uthash_hastr_keylen = (unsigned)uthash_strlen((add)->strfield); \ + HASH_ADD(hh, head, strfield[0], _uthash_hastr_keylen, add); \ +} while (0) #define HASH_REPLACE_STR(head,strfield,add,replaced) \ - HASH_REPLACE(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add,replaced) +do { \ + unsigned _uthash_hrstr_keylen = (unsigned)uthash_strlen((add)->strfield); \ + HASH_REPLACE(hh, head, strfield[0], _uthash_hrstr_keylen, add, replaced); \ +} while (0) #define HASH_FIND_INT(head,findint,out) \ HASH_FIND(hh,head,findint,sizeof(int),out) #define HASH_ADD_INT(head,intfield,add) \ @@ -418,60 +512,58 @@ do { * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. */ #ifdef HASH_DEBUG -#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) -#define HASH_FSCK(hh,head) \ +#include <stdio.h> /* fprintf, stderr */ +#define HASH_OOPS(...) do { fprintf(stderr, __VA_ARGS__); exit(-1); } while (0) +#define HASH_FSCK(hh,head,where) \ do { \ - struct UT_hash_handle *_thh; \ - if (head) { \ - unsigned _bkt_i; \ - unsigned _count; \ - char *_prev; \ - _count = 0; \ - for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ - unsigned _bkt_count = 0; \ - _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ - _prev = NULL; \ - while (_thh) { \ - if (_prev != (char*)(_thh->hh_prev)) { \ - HASH_OOPS("invalid hh_prev %p, actual %p\n", \ - _thh->hh_prev, _prev ); \ - } \ - _bkt_count++; \ - _prev = (char*)(_thh); \ - _thh = _thh->hh_next; \ - } \ - _count += _bkt_count; \ - if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ - HASH_OOPS("invalid bucket count %u, actual %u\n", \ - (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ - } \ - } \ - if (_count != (head)->hh.tbl->num_items) { \ - HASH_OOPS("invalid hh item count %u, actual %u\n", \ - (head)->hh.tbl->num_items, _count ); \ - } \ - /* traverse hh in app order; check next/prev integrity, count */ \ - _count = 0; \ - _prev = NULL; \ - _thh = &(head)->hh; \ - while (_thh) { \ - _count++; \ - if (_prev !=(char*)(_thh->prev)) { \ - HASH_OOPS("invalid prev %p, actual %p\n", \ - _thh->prev, _prev ); \ - } \ - _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ - _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ - (head)->hh.tbl->hho) : NULL ); \ - } \ - if (_count != (head)->hh.tbl->num_items) { \ - HASH_OOPS("invalid app item count %u, actual %u\n", \ - (head)->hh.tbl->num_items, _count ); \ + struct UT_hash_handle *_thh; \ + if (head) { \ + unsigned _bkt_i; \ + unsigned _count = 0; \ + char *_prev; \ + for (_bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; ++_bkt_i) { \ + unsigned _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char*)(_thh->hh_prev)) { \ + HASH_OOPS("%s: invalid hh_prev %p, actual %p\n", \ + (where), (void*)_thh->hh_prev, (void*)_prev); \ } \ + _bkt_count++; \ + _prev = (char*)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("%s: invalid bucket count %u, actual %u\n", \ + (where), (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("%s: invalid hh item count %u, actual %u\n", \ + (where), (head)->hh.tbl->num_items, _count); \ + } \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev != (char*)_thh->prev) { \ + HASH_OOPS("%s: invalid prev %p, actual %p\n", \ + (where), (void*)_thh->prev, (void*)_prev); \ + } \ + _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = (_thh->next ? HH_FROM_ELMT((head)->hh.tbl, _thh->next) : NULL); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("%s: invalid app item count %u, actual %u\n", \ + (where), (head)->hh.tbl->num_items, _count); \ } \ + } \ } while (0) #else -#define HASH_FSCK(hh,head) +#define HASH_FSCK(hh,head,where) #endif /* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to @@ -480,53 +572,48 @@ do { #ifdef HASH_EMIT_KEYS #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ do { \ - unsigned _klen = fieldlen; \ - write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ - write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ } while (0) #else #define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) #endif -/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ -#ifdef HASH_FUNCTION -#define HASH_FCN HASH_FUNCTION -#else -#define HASH_FCN HASH_JEN -#endif - /* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ #define HASH_BER(key,keylen,hashv) \ do { \ - unsigned _hb_keylen=(unsigned)keylen; \ - const unsigned char *_hb_key=(const unsigned char*)(key); \ + unsigned _hb_keylen = (unsigned)keylen; \ + const unsigned char *_hb_key = (const unsigned char*)(key); \ (hashv) = 0; \ while (_hb_keylen-- != 0U) { \ - (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ + (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ } \ } while (0) /* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at - * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx + * (archive link: https://archive.is/Ivcan ) + */ #define HASH_SAX(key,keylen,hashv) \ do { \ unsigned _sx_i; \ - const unsigned char *_hs_key=(const unsigned char*)(key); \ + const unsigned char *_hs_key = (const unsigned char*)(key); \ hashv = 0; \ - for(_sx_i=0; _sx_i < keylen; _sx_i++) { \ - hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + for (_sx_i=0; _sx_i < keylen; _sx_i++) { \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ } \ } while (0) /* FNV-1a variation */ #define HASH_FNV(key,keylen,hashv) \ do { \ unsigned _fn_i; \ - const unsigned char *_hf_key=(const unsigned char*)(key); \ - hashv = 2166136261U; \ - for(_fn_i=0; _fn_i < keylen; _fn_i++) { \ - hashv = hashv ^ _hf_key[_fn_i]; \ - hashv = hashv * 16777619U; \ + const unsigned char *_hf_key = (const unsigned char*)(key); \ + (hashv) = 2166136261U; \ + for (_fn_i=0; _fn_i < keylen; _fn_i++) { \ + hashv = hashv ^ _hf_key[_fn_i]; \ + hashv = hashv * 16777619U; \ } \ } while (0) @@ -583,17 +670,18 @@ do { } \ hashv += (unsigned)(keylen); \ switch ( _hj_k ) { \ - case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ - case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ - case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ - case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ - case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ - case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ - case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ - case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ - case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ - case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ - case 1: _hj_i += _hj_key[0]; \ + case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ + case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ + case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ + case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ + case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ + case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ + case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ + case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ + case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ + case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ + case 1: _hj_i += _hj_key[0]; /* FALLTHROUGH */ \ + default: ; \ } \ HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ } while (0) @@ -641,96 +729,18 @@ do { case 1: hashv += *_sfh_key; \ hashv ^= hashv << 10; \ hashv += hashv >> 1; \ + break; \ + default: ; \ } \ \ - /* Force "avalanching" of final 127 bits */ \ - hashv ^= hashv << 3; \ - hashv += hashv >> 5; \ - hashv ^= hashv << 4; \ - hashv += hashv >> 17; \ - hashv ^= hashv << 25; \ - hashv += hashv >> 6; \ -} while (0) - -#ifdef HASH_USING_NO_STRICT_ALIASING -/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. - * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. - * MurmurHash uses the faster approach only on CPU's where we know it's safe. - * - * Note the preprocessor built-in defines can be emitted using: - * - * gcc -m64 -dM -E - < /dev/null (on gcc) - * cc -## a.c (where a.c is a simple test file) (Sun Studio) - */ -#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) -#define MUR_GETBLOCK(p,i) p[i] -#else /* non intel */ -#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL) -#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL) -#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL) -#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL) -#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) -#if (defined(__BIG_ENDIAN__) || defined(SPARC) || defined(__ppc__) || defined(__ppc64__)) -#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) -#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) -#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) -#else /* assume little endian non-intel */ -#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) -#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) -#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) -#endif -#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ - (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ - (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ - MUR_ONE_THREE(p)))) -#endif -#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) -#define MUR_FMIX(_h) \ -do { \ - _h ^= _h >> 16; \ - _h *= 0x85ebca6bu; \ - _h ^= _h >> 13; \ - _h *= 0xc2b2ae35u; \ - _h ^= _h >> 16; \ -} while (0) - -#define HASH_MUR(key,keylen,hashv) \ -do { \ - const uint8_t *_mur_data = (const uint8_t*)(key); \ - const int _mur_nblocks = (int)(keylen) / 4; \ - uint32_t _mur_h1 = 0xf88D5353u; \ - uint32_t _mur_c1 = 0xcc9e2d51u; \ - uint32_t _mur_c2 = 0x1b873593u; \ - uint32_t _mur_k1 = 0; \ - const uint8_t *_mur_tail; \ - const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \ - int _mur_i; \ - for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) { \ - _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ - _mur_k1 *= _mur_c1; \ - _mur_k1 = MUR_ROTL32(_mur_k1,15); \ - _mur_k1 *= _mur_c2; \ - \ - _mur_h1 ^= _mur_k1; \ - _mur_h1 = MUR_ROTL32(_mur_h1,13); \ - _mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \ - } \ - _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \ - _mur_k1=0; \ - switch((keylen) & 3U) { \ - case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \ - case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \ - case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \ - _mur_k1 *= _mur_c1; \ - _mur_k1 = MUR_ROTL32(_mur_k1,15); \ - _mur_k1 *= _mur_c2; \ - _mur_h1 ^= _mur_k1; \ - } \ - _mur_h1 ^= (uint32_t)(keylen); \ - MUR_FMIX(_mur_h1); \ - hashv = _mur_h1; \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ } while (0) -#endif /* HASH_USING_NO_STRICT_ALIASING */ /* iterate over items in a known bucket to find desired item */ #define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out) \ @@ -742,7 +752,7 @@ do { } \ while ((out) != NULL) { \ if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) { \ - if (uthash_memcmp((out)->hh.key, keyptr, keylen_in) == 0) { \ + if (HASH_KEYCMP((out)->hh.key, keyptr, keylen_in) == 0) { \ break; \ } \ } \ @@ -755,31 +765,42 @@ do { } while (0) /* add an item to a bucket */ -#define HASH_ADD_TO_BKT(head,addhh) \ +#define HASH_ADD_TO_BKT(head,hh,addhh,oomed) \ do { \ - head.count++; \ - (addhh)->hh_next = head.hh_head; \ - (addhh)->hh_prev = NULL; \ - if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); } \ - (head).hh_head=addhh; \ - if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH)) \ - && ((addhh)->tbl->noexpand != 1U)) { \ - HASH_EXPAND_BUCKETS((addhh)->tbl); \ - } \ + UT_hash_bucket *_ha_head = &(head); \ + _ha_head->count++; \ + (addhh)->hh_next = _ha_head->hh_head; \ + (addhh)->hh_prev = NULL; \ + if (_ha_head->hh_head != NULL) { \ + _ha_head->hh_head->hh_prev = (addhh); \ + } \ + _ha_head->hh_head = (addhh); \ + if ((_ha_head->count >= ((_ha_head->expand_mult + 1U) * HASH_BKT_CAPACITY_THRESH)) \ + && !(addhh)->tbl->noexpand) { \ + HASH_EXPAND_BUCKETS(addhh,(addhh)->tbl, oomed); \ + IF_HASH_NONFATAL_OOM( \ + if (oomed) { \ + HASH_DEL_IN_BKT(head,addhh); \ + } \ + ) \ + } \ } while (0) /* remove an item from a given bucket */ -#define HASH_DEL_IN_BKT(hh,head,hh_del) \ - (head).count--; \ - if ((head).hh_head == hh_del) { \ - (head).hh_head = hh_del->hh_next; \ - } \ - if (hh_del->hh_prev) { \ - hh_del->hh_prev->hh_next = hh_del->hh_next; \ - } \ - if (hh_del->hh_next) { \ - hh_del->hh_next->hh_prev = hh_del->hh_prev; \ - } +#define HASH_DEL_IN_BKT(head,delhh) \ +do { \ + UT_hash_bucket *_hd_head = &(head); \ + _hd_head->count--; \ + if (_hd_head->hh_head == (delhh)) { \ + _hd_head->hh_head = (delhh)->hh_next; \ + } \ + if ((delhh)->hh_prev) { \ + (delhh)->hh_prev->hh_next = (delhh)->hh_next; \ + } \ + if ((delhh)->hh_next) { \ + (delhh)->hh_next->hh_prev = (delhh)->hh_prev; \ + } \ +} while (0) /* Bucket expansion has the effect of doubling the number of buckets * and redistributing the items into the new buckets. Ideally the @@ -810,52 +831,56 @@ do { * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) * */ -#define HASH_EXPAND_BUCKETS(tbl) \ +#define HASH_EXPAND_BUCKETS(hh,tbl,oomed) \ do { \ - unsigned _he_bkt; \ - unsigned _he_bkt_i; \ - struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ - UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ - _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ - 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ - if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ - memset(_he_new_buckets, 0, \ - 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ - tbl->ideal_chain_maxlen = \ - (tbl->num_items >> (tbl->log2_num_buckets+1U)) + \ - (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ - tbl->nonideal_items = 0; \ - for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ - { \ - _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ - while (_he_thh != NULL) { \ - _he_hh_nxt = _he_thh->hh_next; \ - HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt); \ - _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ - if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ - tbl->nonideal_items++; \ - _he_newbkt->expand_mult = _he_newbkt->count / \ - tbl->ideal_chain_maxlen; \ - } \ - _he_thh->hh_prev = NULL; \ - _he_thh->hh_next = _he_newbkt->hh_head; \ - if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev = \ - _he_thh; } \ - _he_newbkt->hh_head = _he_thh; \ - _he_thh = _he_hh_nxt; \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ + sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U); \ + if (!_he_new_buckets) { \ + HASH_RECORD_OOM(oomed); \ + } else { \ + uthash_bzero(_he_new_buckets, \ + sizeof(struct UT_hash_bucket) * (tbl)->num_buckets * 2U); \ + (tbl)->ideal_chain_maxlen = \ + ((tbl)->num_items >> ((tbl)->log2_num_buckets+1U)) + \ + ((((tbl)->num_items & (((tbl)->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ + (tbl)->nonideal_items = 0; \ + for (_he_bkt_i = 0; _he_bkt_i < (tbl)->num_buckets; _he_bkt_i++) { \ + _he_thh = (tbl)->buckets[ _he_bkt_i ].hh_head; \ + while (_he_thh != NULL) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT(_he_thh->hashv, (tbl)->num_buckets * 2U, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[_he_bkt]); \ + if (++(_he_newbkt->count) > (tbl)->ideal_chain_maxlen) { \ + (tbl)->nonideal_items++; \ + if (_he_newbkt->count > _he_newbkt->expand_mult * (tbl)->ideal_chain_maxlen) { \ + _he_newbkt->expand_mult++; \ + } \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head != NULL) { \ + _he_newbkt->hh_head->hh_prev = _he_thh; \ } \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ } \ - uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ - tbl->num_buckets *= 2U; \ - tbl->log2_num_buckets++; \ - tbl->buckets = _he_new_buckets; \ - tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ - (tbl->ineff_expands+1U) : 0U; \ - if (tbl->ineff_expands > 1U) { \ - tbl->noexpand=1; \ - uthash_noexpand_fyi(tbl); \ + uthash_free((tbl)->buckets, (tbl)->num_buckets * sizeof(struct UT_hash_bucket)); \ + (tbl)->num_buckets *= 2U; \ + (tbl)->log2_num_buckets++; \ + (tbl)->buckets = _he_new_buckets; \ + (tbl)->ineff_expands = ((tbl)->nonideal_items > ((tbl)->num_items >> 1)) ? \ + ((tbl)->ineff_expands+1U) : 0U; \ + if ((tbl)->ineff_expands > 1U) { \ + (tbl)->noexpand = 1; \ + uthash_noexpand_fyi(tbl); \ } \ uthash_expand_fyi(tbl); \ + } \ } while (0) @@ -869,85 +894,82 @@ do { unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ if (head != NULL) { \ - _hs_insize = 1; \ - _hs_looping = 1; \ - _hs_list = &((head)->hh); \ - while (_hs_looping != 0U) { \ - _hs_p = _hs_list; \ - _hs_list = NULL; \ - _hs_tail = NULL; \ - _hs_nmerges = 0; \ - while (_hs_p != NULL) { \ - _hs_nmerges++; \ - _hs_q = _hs_p; \ - _hs_psize = 0; \ - for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ - _hs_psize++; \ - _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ - ((void*)((char*)(_hs_q->next) + \ - (head)->hh.tbl->hho)) : NULL); \ - if (! (_hs_q) ) { break; } \ - } \ - _hs_qsize = _hs_insize; \ - while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\ - if (_hs_psize == 0U) { \ - _hs_e = _hs_q; \ - _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ - ((void*)((char*)(_hs_q->next) + \ - (head)->hh.tbl->hho)) : NULL); \ - _hs_qsize--; \ - } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) { \ - _hs_e = _hs_p; \ - if (_hs_p != NULL){ \ - _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ - ((void*)((char*)(_hs_p->next) + \ - (head)->hh.tbl->hho)) : NULL); \ - } \ - _hs_psize--; \ - } else if (( \ - cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ - DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ - ) <= 0) { \ - _hs_e = _hs_p; \ - if (_hs_p != NULL){ \ - _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ - ((void*)((char*)(_hs_p->next) + \ - (head)->hh.tbl->hho)) : NULL); \ - } \ - _hs_psize--; \ - } else { \ - _hs_e = _hs_q; \ - _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ - ((void*)((char*)(_hs_q->next) + \ - (head)->hh.tbl->hho)) : NULL); \ - _hs_qsize--; \ - } \ - if ( _hs_tail != NULL ) { \ - _hs_tail->next = ((_hs_e != NULL) ? \ - ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ - } else { \ - _hs_list = _hs_e; \ - } \ - if (_hs_e != NULL) { \ - _hs_e->prev = ((_hs_tail != NULL) ? \ - ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ - } \ - _hs_tail = _hs_e; \ - } \ - _hs_p = _hs_q; \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping != 0U) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p != NULL) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for (_hs_i = 0; _hs_i < _hs_insize; ++_hs_i) { \ + _hs_psize++; \ + _hs_q = ((_hs_q->next != NULL) ? \ + HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ + if (_hs_q == NULL) { \ + break; \ + } \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize != 0U) || ((_hs_qsize != 0U) && (_hs_q != NULL))) { \ + if (_hs_psize == 0U) { \ + _hs_e = _hs_q; \ + _hs_q = ((_hs_q->next != NULL) ? \ + HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ + _hs_qsize--; \ + } else if ((_hs_qsize == 0U) || (_hs_q == NULL)) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL) { \ + _hs_p = ((_hs_p->next != NULL) ? \ + HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ + } \ + _hs_psize--; \ + } else if ((cmpfcn( \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl, _hs_q)) \ + )) <= 0) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL) { \ + _hs_p = ((_hs_p->next != NULL) ? \ + HH_FROM_ELMT((head)->hh.tbl, _hs_p->next) : NULL); \ + } \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = ((_hs_q->next != NULL) ? \ + HH_FROM_ELMT((head)->hh.tbl, _hs_q->next) : NULL); \ + _hs_qsize--; \ } \ - if (_hs_tail != NULL){ \ - _hs_tail->next = NULL; \ + if ( _hs_tail != NULL ) { \ + _hs_tail->next = ((_hs_e != NULL) ? \ + ELMT_FROM_HH((head)->hh.tbl, _hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ } \ - if ( _hs_nmerges <= 1U ) { \ - _hs_looping=0; \ - (head)->hh.tbl->tail = _hs_tail; \ - DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + if (_hs_e != NULL) { \ + _hs_e->prev = ((_hs_tail != NULL) ? \ + ELMT_FROM_HH((head)->hh.tbl, _hs_tail) : NULL); \ } \ - _hs_insize *= 2U; \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + if (_hs_tail != NULL) { \ + _hs_tail->next = NULL; \ + } \ + if (_hs_nmerges <= 1U) { \ + _hs_looping = 0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head, ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ } \ - HASH_FSCK(hh,head); \ - } \ + _hs_insize *= 2U; \ + } \ + HASH_FSCK(hh, head, "HASH_SRT"); \ + } \ } while (0) /* This function selects items from one hash into another hash. @@ -958,54 +980,74 @@ do { #define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ do { \ unsigned _src_bkt, _dst_bkt; \ - void *_last_elt=NULL, *_elt; \ + void *_last_elt = NULL, *_elt; \ UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ - if (src != NULL) { \ - for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ - for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ - _src_hh != NULL; \ - _src_hh = _src_hh->hh_next) { \ - _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ - if (cond(_elt)) { \ - _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ - _dst_hh->key = _src_hh->key; \ - _dst_hh->keylen = _src_hh->keylen; \ - _dst_hh->hashv = _src_hh->hashv; \ - _dst_hh->prev = _last_elt; \ - _dst_hh->next = NULL; \ - if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; } \ - if (dst == NULL) { \ - DECLTYPE_ASSIGN(dst,_elt); \ - HASH_MAKE_TABLE(hh_dst,dst); \ - } else { \ - _dst_hh->tbl = (dst)->hh_dst.tbl; \ - } \ - HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ - HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ - (dst)->hh_dst.tbl->num_items++; \ - _last_elt = _elt; \ - _last_elt_hh = _dst_hh; \ + if ((src) != NULL) { \ + for (_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for (_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh != NULL; \ + _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + IF_HASH_NONFATAL_OOM( int _hs_oomed = 0; ) \ + _dst_hh = (UT_hash_handle*)(void*)(((char*)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh != NULL) { \ + _last_elt_hh->next = _elt; \ + } \ + if ((dst) == NULL) { \ + DECLTYPE_ASSIGN(dst, _elt); \ + HASH_MAKE_TABLE(hh_dst, dst, _hs_oomed); \ + IF_HASH_NONFATAL_OOM( \ + if (_hs_oomed) { \ + uthash_nonfatal_oom(_elt); \ + (dst) = NULL; \ + continue; \ + } \ + ) \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt], hh_dst, _dst_hh, _hs_oomed); \ + (dst)->hh_dst.tbl->num_items++; \ + IF_HASH_NONFATAL_OOM( \ + if (_hs_oomed) { \ + HASH_ROLLBACK_BKT(hh_dst, dst, _dst_hh); \ + HASH_DELETE_HH(hh_dst, dst, _dst_hh); \ + _dst_hh->tbl = NULL; \ + uthash_nonfatal_oom(_elt); \ + continue; \ + } \ + ) \ + HASH_BLOOM_ADD(_dst_hh->tbl, _dst_hh->hashv); \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ } \ } \ } \ - HASH_FSCK(hh_dst,dst); \ + HASH_FSCK(hh_dst, dst, "HASH_SELECT"); \ } while (0) #define HASH_CLEAR(hh,head) \ do { \ - if (head != NULL) { \ + if ((head) != NULL) { \ + HASH_BLOOM_FREE((head)->hh.tbl); \ uthash_free((head)->hh.tbl->buckets, \ (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ - HASH_BLOOM_FREE((head)->hh.tbl); \ uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ - (head)=NULL; \ + (head) = NULL; \ } \ } while (0) #define HASH_OVERHEAD(hh,head) \ - ((head != NULL) ? ( \ + (((head) != NULL) ? ( \ (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ sizeof(UT_hash_table) + \ @@ -1088,7 +1130,7 @@ typedef struct UT_hash_handle { void *next; /* next element in app order */ struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ struct UT_hash_handle *hh_next; /* next hh in bucket order */ - void *key; /* ptr to enclosing struct's key */ + const void *key; /* ptr to enclosing struct's key */ unsigned keylen; /* enclosing struct's key len */ unsigned hashv; /* result of hash-fcn(key) */ } UT_hash_handle; |