1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
|
/*
* The MIT License (MIT)
*
* Copyright (c) 2015 Mikkel F. Jørgensen, dvide.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef HASH_TABLE_H
#define HASH_TABLE_H
#include "ht_portable.h"
#include <stddef.h>
/*
* Define HT_PRIVATE to make all name wrapping interface functions static
* inline when including hash implementation directly in user code. This
* can increase performance significantly (3x) on small hash tables with
* fast hash functions because the compiler can better optimize static
* functions. Some compiler optimizations will get the same speed
* with external linkage (clang 4.2 -O4 but not -O3).
*
* Can also be used to simple hide the interface from global
* linkage to avoid name clashes.
*/
#ifndef HT_PRIVATE
#define HT_PRIV
#else
#define HT_PRIV static inline
#endif
/*
* Generic hash table type. This makes it possible to use hash tables
* in datastructures and header files that do not have access to
* the specific hash table implementation. Call to init is optional
* if the structure is zeroed.
*
* Offsets are only used with Robin Hood hashing to segment each chain.
*
* Keys and values are both stored in the same item pointer. There are
* downsides to this over a key / value represention, but since we also
* use less space we can afford lower the load factor and we can have a
* more complex key representations. The smaller bucket size also helps
* when ordering Robin Hood hash chains.
*/
typedef struct hash_table hash_table_t;
struct hash_table {
void *table;
char *offsets;
size_t count;
/* May be stored as a direct count, or log2. */
size_t buckets;
};
enum hash_table_insert_mode {
ht_replace = 0,
ht_keep = 1,
ht_unique = 2,
ht_multi = 3,
};
/*
* This macro defines the prototypes of the hash table that user code
* needs for linkage.
*
* See also "hash_table_def.h" which builds wrapper functions to a
* generic hash table implementation so each specialization gets its own
* set of named functions.
*
* The HT_ITEM is normally a pointer to and the hash table does not
* store any signficant information internally. Customizations map
* the item value to a key. Certain values can be reserved, for
* example 0 indicates missing value, and sometimes 1 is reserved for
* internal tombstones and 2 may be used to return allocation failure.
*/
#define DECLARE_HASH_TABLE(HT_NAME, HT_ITEM) \
\
typedef hash_table_t HT_NAME##_t; \
typedef HT_ITEM HT_NAME##_item_t; \
\
/* Prototype for user supplied callback when visiting all elements. */ \
typedef void HT_NAME##_visitor_f(void *context, HT_ITEM item); \
\
extern const HT_NAME##_item_t HT_NAME##_missing; \
extern const HT_NAME##_item_t HT_NAME##_nomem; \
extern const HT_NAME##_item_t HT_NAME##_deleted; \
\
static inline int HT_NAME##_is_valid(HT_ITEM item) \
{ \
return \
item != HT_NAME##_missing && \
item != HT_NAME##_nomem && \
item != HT_NAME##_deleted; \
} \
\
static inline int HT_NAME##_is_missing(HT_ITEM item) \
{ \
return item == HT_NAME##_missing; \
} \
\
static inline int HT_NAME##_is_nomem(HT_ITEM item) \
{ \
return item == HT_NAME##_nomem; \
} \
\
static inline int HT_NAME##_is_deleted(HT_ITEM item) \
{ \
return item == HT_NAME##_deleted; \
} \
\
/* \
* Allocates enough buckets to represent count elements without resizing. \
* The actual number of allocated buckets depends on the load factor \
* given as a macro argument in the implementation. The bucket number \
* rounds up to the nearest power of 2. \
* \
* `ht` should not be initialized beforehand, otherwise use resize. \
* Alternatively, it is also valid to zero initialize the table by \
* other means - this will postpone allocation until needed. \
* \
* The load factor (template argument) should be positive and at most \
* 100%, otherwise insertion and resize cannot succeed. The recommended \
* load factor is between 25% and 75%. \
* \
* Returns 0 on success, -1 on allocation failure or invalid load factor. \
*/ \
HT_PRIV int HT_NAME##_init(HT_NAME##_t *ht, size_t count); \
\
/* \
* Clears the allocated memory. Optionally takes a destructor \
* that will visit all items. \
* The table struct may be reused after being destroyed. \
* May also be called on a zero initialised hash table. \
* \
* Can be called in place of clear for more control. \
*/ \
HT_PRIV void HT_NAME##_destroy(HT_NAME##_t *ht, \
HT_NAME##_visitor_f *destructor, void *context); \
\
/* \
* Clears the allocated memory, but does manage memory or state of any \
* stored items. It is a simpler version of destroy. \
*/ \
HT_PRIV void HT_NAME##_clear(HT_NAME##_t *ht); \
\
/* \
* Resizes the hash table to hold at least `count` elements. \
* The actual number of allocated buckets is a strictly larger power of \
* two. If `count` is smaller than the current number of elements, \
* that number is used instead of count. Thus, resize(ht, 0) may be \
* used to reduce the table size after a spike. \
* The function is called automatically as elements are inserted, \
* but shrinking the table should be done manually. \
* \
* If resizing to same size, table is still reallocated but will then \
* clean up old tombstones from excessive deletion. \
* \
* Returns 0 on success, -1 on allocation failure. \
*/ \
HT_PRIV int HT_NAME##_resize(HT_NAME##_t *ht, size_t count); \
\
/* \
* Inserts an item pointer in one of the following modes: \
* \
* ht_keep: \
* If the key exists, the stored item is kept and returned, \
* otherwise it is inserted and null is returned. \
* \
* ht_replace: \
* If the key exists, the stored item is replaced and the old \
* item is returned, otherwise the item is inserted and null \
* is returned. \
* \
* ht_unique: \
* Inserts an item without checking if a key exists. Always return \
* null. This is faster when it is known that the key does not exists. \
* \
* ht_multi: \
* Same as ht_unique but with the intention that a duplicate key \
* might exist. This should not be abused because not all hash table \
* implementions work well with too many collissions. Robin Hood \
* hashing might reallocate aggressively to keep the chain length \
* down. Linear and Quadratic probing do handle this, albeit slow. \
* \
* The inserted item cannot have the value HT_MISSING and depending on \
* implementation also not HT_DELETED and HT_NOMEM, but the \
* definitions are type specific. \
*/ \
HT_PRIV HT_ITEM HT_NAME##_insert(HT_NAME##_t *ht, \
const void *key, size_t len, HT_ITEM item, int mode); \
\
/* Similar to insert, but derives key from item. */ \
HT_PRIV HT_ITEM HT_NAME##_insert_item(HT_NAME##_t *ht, \
HT_ITEM item, int mode); \
\
/* \
* Finds the first matching item if any, or returns null. \
* If there are duplicate keys, the first inserted is returned. \
*/ \
HT_PRIV HT_ITEM HT_NAME##_find(HT_NAME##_t *ht, \
const void *key, size_t len); \
\
/* \
* Removes first inserted item that match the given key, if any. \
* Returns the removed item if any, otherwise null. \
*/ \
HT_PRIV HT_ITEM HT_NAME##_remove(HT_NAME##_t *ht, \
const void *key, size_t len); \
\
/* \
* Finds an item that compares the same as the given item but it is \
* not necessarily the same item if it either isn't stored, or if \
* there are duplicates in the table. \
*/ \
HT_PRIV HT_ITEM HT_NAME##_find_item(HT_NAME##_t *ht, HT_ITEM item); \
\
/* \
* This removes the first item that matches the given item, not \
* necessarily the item itself, and the item need not be present \
* in the table. Even if the item is in fact removed, it may still \
* be present if stored multiple times through abuse use of the \
* insert_unique function. \
*/ \
HT_PRIV HT_ITEM HT_NAME##_remove_item(HT_NAME##_t *ht, HT_ITEM item); \
\
/* \
* Calls a function for every item in the hash table. This may be \
* used for destructing items, provided the table is not accessed \
* subsequently. In fact, the hash_table_clear function takes an \
* optional visitor that does exactly that. \
* \
* The function is linear of the allocated hash table size, so will be \
* inefficient if the hash table was resized much larger than the number \
* of stored items. In that case it is better to store links in the \
* items. For the default resizing, the function is reasonably fast \
* because for cache reasons it is very fast to exclude empty elements. \
*/ \
HT_PRIV void HT_NAME##_visit(HT_NAME##_t *ht, \
HT_NAME##_visitor_f *visitor, void *context); \
\
/* \
* Returns number of elements in the table. (Not necessarily the number of \
* unique keys. \
*/ \
static inline size_t HT_NAME##_count(HT_NAME##_t *ht) \
{ \
return ht->count; \
} \
#endif /* HASH_TABLE_H */
|