aboutsummaryrefslogtreecommitdiff
path: root/flatcc/external/hash/ht_hash_function.h
blob: 1f65ee563e4a601c2692a63be6dd31099058e7ce (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#ifndef HT_HASH_FUNCTION_H
#define HT_HASH_FUNCTION_H

#include <stddef.h>

#ifdef _MSC_VER
/* `inline` only advisory anyway. */
#pragma warning(disable: 4710) /* function not inlined */
#endif

/* Avoid 0 special case in hash functions and allow for configuration with unguessable seed. */
#ifndef HT_HASH_SEED
#define HT_HASH_SEED UINT32_C(0x2f693b52)
#endif

#ifndef HT_HASH_32

#include "cmetrohash.h"

static inline size_t ht_default_hash_function(const void *key, size_t len)
{
    uint64_t out;

    cmetrohash64_1((const uint8_t *)key, len, HT_HASH_SEED, (uint8_t *)&out);
    return (unsigned int)out;
}

/* When using the pointer directly as a hash key. */
static inline size_t ht_ptr_hash_function(const void *key, size_t len)
{
    /* MurmurHash3 64-bit finalizer */
    uint64_t x;

    (void)len;

    x = ((uint64_t)(size_t)key) ^ (HT_HASH_SEED);

    x ^= x >> 33;
    x *= 0xff51afd7ed558ccdULL;
    x ^= x >> 33;
    x *= 0xc4ceb9fe1a85ec53ULL;
    x ^= x >> 33;
    return (size_t)x;
}

#else

#include "PMurHash.h"

static inline size_t ht_default_hash_function(const void *key, size_t len)
{
    return (size_t)PMurHash32((HT_HASH_SEED), key, (int)len);
}

/* When using the pointer directly as a hash key. */
static inline size_t ht_ptr_hash_function(const void *key, size_t len)
{
    /* http://stackoverflow.com/a/12996028 */
    size_t x;

    x = (size_t)key ^ (HT_HASH_SEED);

    x = ((x >> 16) ^ x) * 0x45d9f3bUL;
    x = ((x >> 16) ^ x) * 0x45d9f3bUL;
    x = ((x >> 16) ^ x);
    return x;
}

#endif /* HT_HASH_32 */


/* This assumes the key points to a 32-bit aligned random value that is its own hash function. */
static inline size_t ht_uint32_identity_hash_function(const void *key, size_t len)
{
    (void)len;
    return (size_t)*(uint32_t *)key;
}

/* This assumes the key points to a 64-bit aligned random value that is its own hash function. */
static inline size_t ht_uint64_identity_hash_function(const void *key, size_t len)
{
    (void)len;
    return (size_t)*(uint64_t *)key;
}

/* This assumes the key points to a 32-bit aligned value. */
static inline size_t ht_uint32_hash_function(const void *key, size_t len)
{
    uint32_t x = *(uint32_t *)key + (uint32_t)(HT_HASH_SEED);

    (void)len;

    /* http://stackoverflow.com/a/12996028 */
    x = ((x >> 16) ^ x) * UINT32_C(0x45d9f3b);
    x = ((x >> 16) ^ x) * UINT32_C(0x45d9f3b);
    x = ((x >> 16) ^ x);
    return x;
}

/* This assumes the key points to a 64-bit aligned value. */
static inline size_t ht_uint64_hash_function(const void *key, size_t len)
{
    uint64_t x = *(uint64_t *)key + UINT64_C(0x9e3779b97f4a7c15) + (uint64_t)(HT_HASH_SEED);

    (void)len;

    x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9);
    x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb);
    return (size_t)(x ^ (x >> 31));
}

/*
 * Suited for set operations of low-valued integers where the stored
 * hash pointer is the key and the value.
 *
 * This function is especially useful for small hash tables (<1000)
 * where collisions are cheap due to caching but also works for integer
 * sets up to at least 1,000,000.
 *
 * NOTE: The multiplicative hash function by Knuth requires the modulo
 * to table size be done by shifting the upper bits down, since this is
 * where the quality bits reside. This yields significantly fewer
 * collisions which is important for e.g. chained hashing. However, our
 * interface does not provide the required information.
 *
 * When used in open hashing with load factors below 0.7 where the
 * stored pointer is also the key, collision checking is very cheap and
 * this pays off in a large range of table sizes where a more
 * complicated hash simply doesn't pay off.
 * 
 * When used with a pointer set where the pointer is also the key, it is
 * not likely to work as well because the pointer acts as a large
 * integer which works against the design of the hash function. Here a
 * better mix function is probably worthwhile - therefore we also have
 * ht_ptr_hash_function.
 */
static inline size_t ht_int_hash_function(const void *key, size_t len)
{
    (void)len;
    return ((size_t)key ^ (HT_HASH_SEED)) * 2654435761UL;
}

/* Bernsteins hash function, assumes string is zero terminated, len is ignored. */
static inline size_t ht_str_hash_function(const void *key, size_t len)
{
    const unsigned char *str = key;
    size_t hash = 5381 ^ (HT_HASH_SEED);
    size_t c;

    (void)len;

    while ((c = (size_t)*str++))
        hash = ((hash << 5) + hash) ^ c; /* (hash * 33) xor c */

    return hash;
}

/* Hashes at most len characters or until zero termination. */
static inline size_t ht_strn_hash_function(const void *key, size_t len)
{
    const unsigned char *str = key;
    size_t hash = 5381 ^ (HT_HASH_SEED);
    size_t c;

    while (--len && (c = (size_t)*str++))
        hash = ((hash << 5) + hash) ^ c; /* (hash * 33) xor c */

    return hash;
}

static inline uint32_t ht_fnv1a32_hash_function(const void *key, size_t len)
{
#ifndef FNV1A_NOMUL
    const uint32_t prime = UINT32_C(0x1000193);
#endif
    uint32_t hash = UINT32_C(0x811c9dc5);
    const uint8_t *p = key;

    while (len--) {
        hash ^= (uint64_t)*p++;
#ifndef FNV1A_NOMUL
        hash *= prime;
#else
        hash += (hash << 1) + (hash << 4) + (hash << 7) +
            (hash << 8) + (hash << 24);
#endif
    }
    return hash;
}

static inline uint64_t ht_fnv1a64_hash_function(const void *key, size_t len)
{
#ifndef FNV1A_NOMUL
    const uint64_t prime = UINT64_C(0x100000001b3);
#endif
    uint64_t hash = UINT64_C(0xcbf29ce484222325);
    const uint8_t *p = key;

    while (len--) {
        hash ^= (uint64_t)*p++;
#ifndef FNV1A_NOMUL
        hash *= prime;
#else
        hash += (hash << 1) + (hash << 4) + (hash << 5) +
		    (hash << 7) + (hash << 8) + (hash << 40);
#endif
    }
    return hash;
}

/* Hashes until string termination and ignores length argument. */
static inline uint32_t ht_fnv1a32_str_hash_function(const void *key, size_t len)
{
#ifndef FNV1A_NOMUL
    const uint32_t prime = UINT32_C(0x1000193);
#endif
    uint32_t hash = UINT32_C(0x811c9dc5);
    const uint8_t *p = key;

    (void)len;

    while (*p) {
        hash ^= (uint64_t)*p++;
#ifndef FNV1A_NOMUL
        hash *= prime;
#else
        hash += (hash << 1) + (hash << 4) + (hash << 7) +
            (hash << 8) + (hash << 24);
#endif
    }
    return hash;
}

/* Hashes until string termination and ignores length argument. */
static inline uint64_t ht_fnv1a64_str_hash_function(const void *key, size_t len)
{
#ifndef FNV1A_NOMUL
    const uint64_t prime = UINT64_C(0x100000001b3);
#endif
    uint64_t hash = UINT64_C(0xcbf29ce484222325);
    const uint8_t *p = key;

    (void)len;

    while (*p) {
        hash ^= (uint64_t)*p++;
#ifndef FNV1A_NOMUL
        hash *= prime;
#else
        hash += (hash << 1) + (hash << 4) + (hash << 5) +
		    (hash << 7) + (hash << 8) + (hash << 40);
#endif
    }
    return hash;
}


#endif /* HT_HASH_FUNCTION_H */