diff options
Diffstat (limited to 'dependencies/uthash/tests/keystat.c')
-rw-r--r-- | dependencies/uthash/tests/keystat.c | 255 |
1 files changed, 255 insertions, 0 deletions
diff --git a/dependencies/uthash/tests/keystat.c b/dependencies/uthash/tests/keystat.c new file mode 100644 index 000000000..cb64ca66f --- /dev/null +++ b/dependencies/uthash/tests/keystat.c @@ -0,0 +1,255 @@ +#include <sys/types.h> /* for 'open' */ +#include <sys/stat.h> /* for 'open' */ +#include <fcntl.h> /* for 'open' */ +#include <stdlib.h> /* for 'malloc' */ +#include <stdio.h> /* for 'printf' */ +#include <unistd.h> /* for 'read' */ +#include <errno.h> /* for 'sterror' */ +#include <sys/time.h> /* for 'gettimeofday' */ +#include "uthash.h" + +#undef uthash_noexpand_fyi +#define uthash_noexpand_fyi(t) die() +#define UNALIGNED_KEYS 0 + +static void die() +{ + fprintf(stderr,"expansion inhibited\n"); + exit(-1); +} + +/* Windows doesn't have gettimeofday. While Cygwin and some + * versions of MinGW supply one, it is very coarse. This substitute + * gives much more accurate elapsed times under Windows. */ +#if (( defined __CYGWIN__ ) || ( defined __MINGW32__ )) +#include <windows.h> +static void win_gettimeofday(struct timeval* p, void* tz /* IGNORED */) +{ + LARGE_INTEGER q; + static long long freq; + static long long cyg_timer; + QueryPerformanceFrequency(&q); + freq = q.QuadPart; + QueryPerformanceCounter(&q); + cyg_timer = q.QuadPart; + p->tv_sec = (long)(cyg_timer / freq); + p->tv_usec = (long)(((cyg_timer % freq) * 1000000) / freq); +} +#define gettimeofday win_gettimeofday +#define MODE (O_RDONLY|O_BINARY) +#else +#define MODE (O_RDONLY) +#endif + +#ifndef timersub +#define timersub(a, b, result) \ + do { \ + (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ + (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ + if ((result)->tv_usec < 0) { \ + --(result)->tv_sec; \ + (result)->tv_usec += 1000000; \ + } \ + } while (0) +#endif + +typedef struct stat_key { + char *key; + unsigned len; + UT_hash_handle hh, hh2; +} stat_key; + +#define CHAIN_0 0 +#define CHAIN_5 1 +#define CHAIN_10 2 +#define CHAIN_20 3 +#define CHAIN_100 4 +#define CHAIN_MAX 5 +static void hash_chain_len_histogram(const UT_hash_table *tbl) +{ + unsigned i, bkt_hist[CHAIN_MAX+1]; + double pct = 100.0/(double)tbl->num_buckets; + memset(bkt_hist,0,sizeof(bkt_hist)); + for(i=0; i < tbl->num_buckets; i++) { + unsigned count = tbl->buckets[i].count; + if (count == 0U) { + bkt_hist[CHAIN_0]++; + } else if (count < 5U) { + bkt_hist[CHAIN_5]++; + } else if (count < 10U) { + bkt_hist[CHAIN_10]++; + } else if (count < 20U) { + bkt_hist[CHAIN_20]++; + } else if (count < 100U) { + bkt_hist[CHAIN_100]++; + } else { + bkt_hist[CHAIN_MAX]++; + } + } + fprintf(stderr, "Buckets with 0 items: %.1f%%\n", (double)bkt_hist[CHAIN_0 ]*pct); + fprintf(stderr, "Buckets with < 5 items: %.1f%%\n", (double)bkt_hist[CHAIN_5 ]*pct); + fprintf(stderr, "Buckets with < 10 items: %.1f%%\n", (double)bkt_hist[CHAIN_10]*pct); + fprintf(stderr, "Buckets with < 20 items: %.1f%%\n", (double)bkt_hist[CHAIN_20]*pct); + fprintf(stderr, "Buckets with < 100 items: %.1f%%\n", (double)bkt_hist[CHAIN_100]*pct); + fprintf(stderr, "Buckets with > 100 items: %.1f%%\n", (double)bkt_hist[CHAIN_MAX]*pct); +} + +int main(int argc, char *argv[]) +{ + int dups=0, rc, fd, done=0, err=0, want, i, padding=0, v=1, percent=100; + unsigned keylen, max_keylen=0, verbose=0; + const char *filename = "/dev/stdin"; + char *dst; + stat_key *keyt, *keytmp, *keys=NULL, *keys2=NULL; + struct timeval start_tm, end_tm, elapsed_tm, elapsed_tm2, elapsed_tm3; + + if ((argc >= 3) && (strcmp(argv[1],"-p") == 0)) { + percent = atoi(argv[2]); + v = 3; + } + if ((v < argc) && (strcmp(argv[v],"-v") == 0)) { + verbose=1; + v++; + } + if (v < argc) { + filename=argv[v]; + } + fd=open(filename,MODE); + + if ( fd == -1 ) { + fprintf(stderr,"open failed %s: %s\n", filename, strerror(errno)); + return -1; + } + + for(i=0; done==0; i++) { + + want = sizeof(int); + dst = (char*)&keylen; +readmore1: + rc = read(fd,dst,want); + if (rc != want) { + if (rc == 0) { + done=1; + } else if (rc == -1) { + fprintf(stderr,"read failed: %s\n", strerror(errno)); + err=1; + } else if (rc > 0) { + want -= rc; + dst += rc; + goto readmore1; + } + } + + if (done || err) { + break; + } + if (keylen > max_keylen) { + max_keylen=keylen; + } + + keyt = (stat_key*)malloc(sizeof(stat_key)); + if (keyt == NULL) { + fprintf(stderr,"out of memory\n"); + exit(-1); + } + + /* read key */ +#ifdef UNALIGNED_KEYS + padding = i%8; +#endif + keyt->key = (char*)malloc(padding+keylen); + if (keyt->key == NULL) { + fprintf(stderr,"out of memory\n"); + exit(-1); + } + keyt->key += padding; /* forcibly alter the alignment of key */ + keyt->len = keylen; + + want = keylen; + dst = keyt->key; +readmore2: + rc = read(fd,dst,want); + if (rc != want) { + if (rc == -1) { + fprintf(stderr,"read failed: %s\n", strerror(errno)); + err=1; + } else if (rc == 0) { + fprintf(stderr,"incomplete file\n"); + err=1; + } else if (rc >= 0) { + want -= rc; + dst += rc; + goto readmore2; + } + } + if (err != 0) { + break; + } + /* if percent was set to something less than 100%, skip some keys*/ + if (((rand()*1.0) / RAND_MAX) > ((percent*1.0)/100)) { + free(keyt->key-padding); + free(keyt); + continue; + } + + /* eliminate dups */ + HASH_FIND(hh,keys,keyt->key,keylen,keytmp); + if (keytmp != NULL) { + dups++; + free(keyt->key - padding); + free(keyt); + } else { + HASH_ADD_KEYPTR(hh,keys,keyt->key,keylen,keyt); + } + } + + if (verbose != 0) { + unsigned key_count = HASH_COUNT(keys); + fprintf(stderr,"max key length: %u\n", max_keylen); + fprintf(stderr,"number unique keys: %u\n", key_count); + fprintf(stderr,"keystats memory: %u\n", + (unsigned)((sizeof(stat_key)+max_keylen)*key_count)); + hash_chain_len_histogram(keys->hh.tbl); + } + + /* add all keys to a new hash, so we can measure add time w/o malloc */ + gettimeofday(&start_tm,NULL); + for(keyt = keys; keyt != NULL; keyt=(stat_key*)keyt->hh.next) { + HASH_ADD_KEYPTR(hh2,keys2,keyt->key,keyt->len,keyt); + } + gettimeofday(&end_tm,NULL); + timersub(&end_tm, &start_tm, &elapsed_tm); + + /* now look up all keys in the new hash, again measuring elapsed time */ + gettimeofday(&start_tm,NULL); + for(keyt = keys; keyt != NULL; keyt=(stat_key*)keyt->hh.next) { + HASH_FIND(hh2,keys2,keyt->key,keyt->len,keytmp); + if (keytmp == NULL) { + fprintf(stderr,"internal error, key not found\n"); + } + } + gettimeofday(&end_tm,NULL); + timersub(&end_tm, &start_tm, &elapsed_tm2); + + /* now delete all items in the new hash, measuring elapsed time */ + gettimeofday(&start_tm,NULL); + while (keys2 != NULL) { + keytmp = keys2; + HASH_DELETE(hh2,keys2,keytmp); + } + gettimeofday(&end_tm,NULL); + timersub(&end_tm, &start_tm, &elapsed_tm3); + + if (err == 0) { + printf("%.3f,%u,%u,%d,%s,%ld,%ld,%ld\n", + 1-(1.0*keys->hh.tbl->nonideal_items/keys->hh.tbl->num_items), + keys->hh.tbl->num_items, + keys->hh.tbl->num_buckets, + dups, + (keys->hh.tbl->noexpand != 0U) ? "nx" : "ok", + (elapsed_tm.tv_sec * 1000000) + elapsed_tm.tv_usec, + (elapsed_tm2.tv_sec * 1000000) + elapsed_tm2.tv_usec, + (elapsed_tm3.tv_sec * 1000000) + elapsed_tm3.tv_usec ); + } + return 0; +} |